[PATCH v3 1/2] c++: implement __is_unsigned built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_unsigned.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_unsigned.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_UNSIGNED.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_unsigned.
* g++.dg/ext/is_unsigned.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_unsigned.C   | 47 
 5 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_unsigned.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..ec8de87d1a1 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3751,6 +3751,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_UNSIGNED:
+  inform (loc, "  %qT is not an unsigned type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..1a219243162 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_UNSIGNED, "__is_unsigned", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..2d48894d811 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12118,6 +12118,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_UNSIGNED:
+  return TYPE_UNSIGNED (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_UNSIGNED:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..20bf8e6cad5 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_unsigned)
+# error "__has_builtin (__is_unsigned) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_unsigned.C 
b/gcc/testsuite/g++.dg/ext/is_unsigned.C
new file mode 100644
index 000..2bb45d209a7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_unsigned.C
@@ -0,0 +1,47 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_unsigned, void, false);
+
+SA_TEST_CATEGORY(__is_unsigned, bool, (bool(-1) > bool(0)));
+SA_TEST_CATEGORY(__is_unsigned, char, (char(-1) > char(0)));
+SA_TEST_CATEGORY(__is_unsigned, signed char, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned char, true);
+SA_TEST_CATEGORY(__is_unsigned, wchar_t, (wchar_t(-1) > wchar_t(0)));
+SA_TEST_CATEGORY(__is_unsigned, short, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned short, true);
+SA_TEST_CATEGORY(__is_unsigned, int, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned int, true);
+SA_TEST_CATEGORY(__is_unsigned, long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long, true);
+SA_TEST_CATEGORY(__is_unsigned, long long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long long, true);
+
+SA_TEST_CATEGORY(__is_unsigned, float, false);
+SA_TEST_CATEGORY(__is_unsigned, double, false);
+SA_TEST_CATEGORY(__is_unsigned, long double, false);
+
+#ifndef __STRICT_ANSI__
+// GNU Extensions.
+#ifdef __SIZEOF_INT128__
+SA_TEST_CATEGORY(__is_unsigned, unsigned __int128, true);
+SA_TEST_CATEGORY(__is_unsigned, __int128, false);
+#endif
+
+#ifdef _GLIBCXX_USE_FLOAT128
+SA_TEST_CATEGORY(__is_unsigned, __float128, false);
+#endif
+#endif
+
+// Sanity check.

[PATCH v2 2/2] libstdc++: use new built-in trait __remove_pointer

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __remove_pointer.

libstdc++-v3/ChangeLog:

* include/std/type_traits (remove_pointer): Use __remove_pointer 
built-in trait.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..81497e2f3e1 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2023,6 +2023,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Pointer modifications.
 
+  /// remove_pointer
+#if __has_builtin(__remove_pointer)
+  template
+struct remove_pointer
+{ using type = __remove_pointer(_Tp); };
+#else
   template
 struct __remove_pointer_helper
 { using type = _Tp; };
@@ -2031,11 +2037,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __remove_pointer_helper<_Tp, _Up*>
 { using type = _Up; };
 
-  /// remove_pointer
   template
 struct remove_pointer
 : public __remove_pointer_helper<_Tp, __remove_cv_t<_Tp>>
 { };
+#endif
 
   template
 struct __add_pointer_helper
-- 
2.41.0



[PATCH v2 1/2] c++: implement __remove_pointer built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::remove_pointer.

gcc/cp/ChangeLog:

* cp-trait.def: Define __remove_pointer.
* semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer.
* g++.dg/ext/remove_pointer.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/cp-trait.def   |  1 +
 gcc/cp/semantics.cc   |  5 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
 gcc/testsuite/g++.dg/ext/remove_pointer.C | 51 +++
 4 files changed, 60 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/remove_pointer.C

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..07823e55579 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -90,6 +90,7 @@ DEFTRAIT_EXPR (IS_DEDUCIBLE, "__is_deducible ", 2)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
 DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
 DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
+DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1)
 DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
 DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1)
 
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..a3b283ce938 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12374,6 +12374,11 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
tree type2,
type1 = TREE_TYPE (type1);
   return cv_unqualified (type1);
 
+case CPTK_REMOVE_POINTER:
+  if (TYPE_PTR_P (type1))
+type1 = TREE_TYPE (type1);
+  return type1;
+
 case CPTK_TYPE_PACK_ELEMENT:
   return finish_type_pack_element (type1, type2, complain);
 
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..e21e0a95509 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__remove_pointer)
+# error "__has_builtin (__remove_pointer) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/remove_pointer.C 
b/gcc/testsuite/g++.dg/ext/remove_pointer.C
new file mode 100644
index 000..7b13db93950
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/remove_pointer.C
@@ -0,0 +1,51 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+SA(__is_same(__remove_pointer(int), int));
+SA(__is_same(__remove_pointer(int*), int));
+SA(__is_same(__remove_pointer(int**), int*));
+
+SA(__is_same(__remove_pointer(const int*), const int));
+SA(__is_same(__remove_pointer(const int**), const int*));
+SA(__is_same(__remove_pointer(int* const), int));
+SA(__is_same(__remove_pointer(int** const), int*));
+SA(__is_same(__remove_pointer(int* const* const), int* const));
+
+SA(__is_same(__remove_pointer(volatile int*), volatile int));
+SA(__is_same(__remove_pointer(volatile int**), volatile int*));
+SA(__is_same(__remove_pointer(int* volatile), int));
+SA(__is_same(__remove_pointer(int** volatile), int*));
+SA(__is_same(__remove_pointer(int* volatile* volatile), int* volatile));
+
+SA(__is_same(__remove_pointer(const volatile int*), const volatile int));
+SA(__is_same(__remove_pointer(const volatile int**), const volatile int*));
+SA(__is_same(__remove_pointer(const int* volatile), const int));
+SA(__is_same(__remove_pointer(volatile int* const), volatile int));
+SA(__is_same(__remove_pointer(int* const volatile), int));
+SA(__is_same(__remove_pointer(const int** volatile), const int*));
+SA(__is_same(__remove_pointer(volatile int** const), volatile int*));
+SA(__is_same(__remove_pointer(int** const volatile), int*));
+SA(__is_same(__remove_pointer(int* const* const volatile), int* const));
+SA(__is_same(__remove_pointer(int* volatile* const volatile), int* volatile));
+SA(__is_same(__remove_pointer(int* const volatile* const volatile), int* const 
volatile));
+
+SA(__is_same(__remove_pointer(int&), int&));
+SA(__is_same(__remove_pointer(const int&), const int&));
+SA(__is_same(__remove_pointer(volatile int&), volatile int&));
+SA(__is_same(__remove_pointer(const volatile int&), const volatile int&));
+
+SA(__is_same(__remove_pointer(int&&), int&&));
+SA(__is_same(__remove_pointer(const int&&), const int&&));
+SA(__is_same(__remove_pointer(volatile int&&), volatile int&&));
+SA(__is_same(__remove_pointer(const volatile int&&), const volatile int&&));
+
+SA(__is_same(__remove_pointer(int[3]), int[3]));
+SA(__is_same(__remove_pointer(const int[3]), const int[3]));
+SA(__is_same(__remove_pointer(volatile int[3]), volatile int[3]));
+SA(__is_same(__remove_pointer(const volatile int[3]), const volatile int[3]));
+
+SA(__is_same(__remove_pointer(int(int)), int(int)));
+SA(__is_same(__remove_pointer(int(*const)(int)), int(int)));

[PATCH v3 2/2] libstdc++: use new built-in trait __is_const

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_const.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_const): Use __is_const built-in trait.
(is_const_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..3a46eca5377 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -764,6 +764,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Type properties.
 
   /// is_const
+#if __has_builtin(__is_const)
+  template
+struct is_const
+: public __bool_constant<__is_const(_Tp)>
+{ };
+#else
   template
 struct is_const
 : public false_type { };
@@ -771,6 +777,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_const<_Tp const>
 : public true_type { };
+#endif
 
   /// is_volatile
   template
@@ -3210,10 +3217,17 @@ template 
   inline constexpr bool is_compound_v = is_compound<_Tp>::value;
 template 
   inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
+
+#if __has_builtin(__is_const)
+template 
+  inline constexpr bool is_const_v = __is_const(_Tp);
+#else
 template 
   inline constexpr bool is_const_v = false;
 template 
   inline constexpr bool is_const_v = true;
+#endif
+
 template 
   inline constexpr bool is_volatile_v = false;
 template 
-- 
2.41.0



[PATCH v3 1/2] c++: implement __is_const built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_const.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_const.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_CONST.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_const.
* g++.dg/ext/is_const.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_const.C  | 19 +++
 5 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_const.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..ff4ae831def 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3751,6 +3751,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_CONST:
+  inform (loc, "  %qT is not a const type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..b40b475b86d 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_CONST, "__is_const", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..011ba8e46e1 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12079,6 +12079,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_CONST:
+  return CP_TYPE_CONST_P (type1);
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_CONST:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..965309a333a 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_const)
+# error "__has_builtin (__is_const) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_const.C 
b/gcc/testsuite/g++.dg/ext/is_const.C
new file mode 100644
index 000..8f2d7c2fce9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_const.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+
+// Positive tests.
+SA(__is_const(const int));
+SA(__is_const(const volatile int));
+SA(__is_const(cClassType));
+SA(__is_const(cvClassType));
+
+// Negative tests.
+SA(!__is_const(int));
+SA(!__is_const(volatile int));
+SA(!__is_const(ClassType));
+SA(!__is_const(vClassType));
-- 
2.41.0



[PATCH v3 2/2] libstdc++: use new built-in trait __is_array

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_array.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_array): Use __is_array built-in trait.
(is_array_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 12 
 1 file changed, 12 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..f2a3a327e7d 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -503,6 +503,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_array
+#if __has_builtin(__is_array)
+  template
+struct is_array
+: public __bool_constant<__is_array(_Tp)>
+{ };
+#else
   template
 struct is_array
 : public false_type { };
@@ -514,6 +520,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_array<_Tp[]>
 : public true_type { };
+#endif
 
   template
 struct __is_pointer_helper
@@ -3161,12 +3168,17 @@ template 
 template 
   inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
 
+#if __has_builtin(__is_array)
+template 
+  inline constexpr bool is_array_v = __is_array(_Tp);
+#else
 template 
   inline constexpr bool is_array_v = false;
 template 
   inline constexpr bool is_array_v<_Tp[]> = true;
 template 
   inline constexpr bool is_array_v<_Tp[_Num]> = true;
+#endif
 
 template 
   inline constexpr bool is_pointer_v = is_pointer<_Tp>::value;
-- 
2.41.0



[PATCH v3 1/2] c++: implement __is_array built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_array.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_array.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_ARRAY.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_array.
* g++.dg/ext/is_array.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_array.C  | 28 
 5 files changed, 39 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_array.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..7cec7eba591 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3751,6 +3751,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_ARRAY:
+  inform (loc, "  %qT is not an array", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..f68c7f2e8ec 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_ARRAY, "__is_array", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..22f2700ec0b 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12118,6 +12118,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_ARRAY:
+  return type_code1 == ARRAY_TYPE;
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_ARRAY:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..56485ae62be 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_array)
+# error "__has_builtin (__is_array) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_array.C 
b/gcc/testsuite/g++.dg/ext/is_array.C
new file mode 100644
index 000..facfed5c7cb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_array.C
@@ -0,0 +1,28 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_array, int[2], true);
+SA_TEST_CATEGORY(__is_array, int[], true);
+SA_TEST_CATEGORY(__is_array, int[2][3], true);
+SA_TEST_CATEGORY(__is_array, int[][3], true);
+SA_TEST_CATEGORY(__is_array, float*[2], true);
+SA_TEST_CATEGORY(__is_array, float*[], true);
+SA_TEST_CATEGORY(__is_array, float*[2][3], true);
+SA_TEST_CATEGORY(__is_array, float*[][3], true);
+SA_TEST_CATEGORY(__is_array, ClassType[2], true);
+SA_TEST_CATEGORY(__is_array, ClassType[], true);
+SA_TEST_CATEGORY(__is_array, ClassType[2][3], true);
+SA_TEST_CATEGORY(__is_array, ClassType[][3], true);
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_array, ClassType, false);
-- 
2.41.0



[PATCH v3 2/2] libstdc++: use new built-in trait __is_volatile

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_volatile.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_volatile): Use __is_volatile built-in
trait.
(is_volatile_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..db74b884b35 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -773,6 +773,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public true_type { };
 
   /// is_volatile
+#if __has_builtin(__is_volatile)
+  template
+struct is_volatile
+: public __bool_constant<__is_volatile(_Tp)>
+{ };
+#else
   template
 struct is_volatile
 : public false_type { };
@@ -780,6 +786,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_volatile<_Tp volatile>
 : public true_type { };
+#endif
 
   /// is_trivial
   template
@@ -3214,10 +3221,16 @@ template 
   inline constexpr bool is_const_v = false;
 template 
   inline constexpr bool is_const_v = true;
+
+#if __has_builtin(__is_volatile)
+template 
+  inline constexpr bool is_volatile_v = __is_volatile(_Tp);
+#else
 template 
   inline constexpr bool is_volatile_v = false;
 template 
   inline constexpr bool is_volatile_v = true;
+#endif
 
 template 
   inline constexpr bool is_trivial_v = __is_trivial(_Tp);
-- 
2.41.0



[PATCH v3 1/2] c++: implement __is_volatile built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_volatile.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_volatile.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_VOLATILE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_volatile.
* g++.dg/ext/is_volatile.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_volatile.C   | 19 +++
 5 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_volatile.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..e971d67ee25 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3751,6 +3751,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_VOLATILE:
+  inform (loc, "  %qT is not a volatile type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..414b1065a11 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_VOLATILE, "__is_volatile", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..10934d01504 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12079,6 +12079,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_VOLATILE:
+  return CP_TYPE_VOLATILE_P (type1);
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_VOLATILE:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..7ad640f141b 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_volatile)
+# error "__has_builtin (__is_volatile) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_volatile.C 
b/gcc/testsuite/g++.dg/ext/is_volatile.C
new file mode 100644
index 000..004e397e5e7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_volatile.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+
+// Positive tests.
+SA(__is_volatile(volatile int));
+SA(__is_volatile(const volatile int));
+SA(__is_volatile(vClassType));
+SA(__is_volatile(cvClassType));
+
+// Negative tests.
+SA(!__is_volatile(int));
+SA(!__is_volatile(const int));
+SA(!__is_volatile(ClassType));
+SA(!__is_volatile(cClassType));
-- 
2.41.0



[PATCH v8 5/6] c++, libstdc++: implement __is_void built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_void. Since the new built-in
name is __is_void, to avoid unintentional macro replacement, this patch also
involves the removal of the existing __is_void in helper_functions.h and
cpp_type_traits.h and renaming __is_void to is_void in the test file,
pr46567.C.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_void.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_VOID.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/tm/pr46567.C (__is_void): Rename to ...
(is_void): ... this.
* g++.dg/ext/has-builtin-1.C: Test existence of __is_void.
* g++.dg/ext/is_void.C: New test.

libstdc++-v3/ChangeLog:

* include/debug/helper_functions.h (_DiffTraits): Stop using
__is_void.
* include/bits/cpp_type_traits.h (__is_void): Remove unused __is_void.
* include/std/type_traits (is_void_v): Use __is_void built-in
trait.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc  |  3 ++
 gcc/cp/cp-trait.def   |  1 +
 gcc/cp/semantics.cc   |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
 gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
 gcc/testsuite/g++.dg/tm/pr46567.C |  6 ++--
 libstdc++-v3/include/bits/cpp_type_traits.h   | 15 
 libstdc++-v3/include/debug/helper_functions.h |  5 ++-
 libstdc++-v3/include/std/type_traits  |  6 
 9 files changed, 57 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 927605c6cb7..e8cd98eb2c7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3757,6 +3757,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
+case CPTK_IS_VOID:
+  inform (loc, "  %qT is not a void type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 3cd3babc242..8e76668f6ed 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -84,6 +84,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, 
"__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
+DEFTRAIT_EXPR (IS_VOID, "__is_void", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index b976633645a..c4d44413dce 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FUNCTION:
   return type_code1 == FUNCTION_TYPE;
 
+case CPTK_IS_VOID:
+  return VOID_TYPE_P (type1);
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12297,6 +12300,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_SAME:
 case CPTK_IS_REFERENCE:
 case CPTK_IS_FUNCTION:
+case CPTK_IS_VOID:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 90eb00ebf2d..b96cc9e6f50 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -152,3 +152,6 @@
 #if !__has_builtin (__is_function)
 # error "__has_builtin (__is_function) failed"
 #endif
+#if !__has_builtin (__is_void)
+# error "__has_builtin (__is_void) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_void.C 
b/gcc/testsuite/g++.dg/ext/is_void.C
new file mode 100644
index 000..707f0d6875b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_void.C
@@ -0,0 +1,35 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_void, void, true);
+
+SA_TEST_CATEGORY(__is_void, char, false);
+SA_TEST_CATEGORY(__is_void, signed char, false);
+SA_TEST_CATEGORY(__is_void, unsigned char, false);
+SA_TEST_CATEGORY(__is_void, wchar_t, false);
+SA_TEST_CATEGORY(__is_void, short, false);
+SA_TEST_CATEGORY(__is_void, unsigned short, false);

[PATCH v8 6/6] libstdc++: make std::is_object dispatch to new built-in traits

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch gets std::is_object to dispatch to new built-in traits,
__is_function, __is_reference, and __is_void.

libstdc++-v3/ChangeLog:
* include/std/type_traits (is_object): Use new built-in traits,
__is_function, __is_reference, and __is_void.
(is_object_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 780fcc00135..715310f10bf 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -682,11 +682,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_object
+#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
+&& __has_builtin(__is_void)
+  template
+struct is_object
+: public __bool_constant
+{ };
+#else
   template
 struct is_object
 : public __not_<__or_, is_reference<_Tp>,
   is_void<_Tp>>>::type
 { };
+#endif
 
   template
 struct is_member_pointer;
@@ -3235,8 +3244,17 @@ template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
 template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
+
+#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
+&& __has_builtin(__is_void)
+template 
+  inline constexpr bool is_object_v
+= !(__is_function(_Tp) || __is_reference(_Tp) || __is_void(_Tp));
+#else
 template 
   inline constexpr bool is_object_v = is_object<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
 template 
-- 
2.41.0



[PATCH v8 3/6] c++: implement __is_function built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_function.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_function.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_function.
* g++.dg/ext/is_function.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_function.C   | 58 
 5 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index f6951ee2670..927605c6cb7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3754,6 +3754,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_FUNCTION:
+  inform (loc, "  %qT is not a function", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 1e3310cd682..3cd3babc242 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -83,6 +83,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 2f37bc353a1..b976633645a 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12072,6 +12072,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_FUNCTION:
+  return type_code1 == FUNCTION_TYPE;
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12293,6 +12296,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
 case CPTK_IS_REFERENCE:
+case CPTK_IS_FUNCTION:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index b697673790c..90eb00ebf2d 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -149,3 +149,6 @@
 #if !__has_builtin (__is_reference)
 # error "__has_builtin (__is_reference) failed"
 #endif
+#if !__has_builtin (__is_function)
+# error "__has_builtin (__is_function) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_function.C 
b/gcc/testsuite/g++.dg/ext/is_function.C
new file mode 100644
index 000..2e1594b12ad
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_function.C
@@ -0,0 +1,58 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+struct A
+{ void fn(); };
+
+template
+struct AHolder { };
+
+template
+struct AHolder
+{ using type = U; };
+
+// Positive tests.
+SA(__is_function(int (int)));
+SA(__is_function(ClassType (ClassType)));
+SA(__is_function(float (int, float, int[], int&)));
+SA(__is_function(int (int, ...)));
+SA(__is_function(bool (ClassType) const));
+SA(__is_function(AHolder::type));
+
+void fn();
+SA(__is_function(decltype(fn)));
+
+// Negative tests.
+SA_TEST_CATEGORY(__is_function, int, false);
+SA_TEST_CATEGORY(__is_function, int*, false);
+SA_TEST_CATEGORY(__is_function, int&, false);
+SA_TEST_CATEGORY(__is_function, void, false);
+SA_TEST_CATEGORY(__is_function, void*, false);
+SA_TEST_CATEGORY(__is_function, void**, false);
+SA_TEST_CATEGORY(__is_function, std::nullptr_t, false);
+
+SA_TEST_CATEGORY(__is_function, AbstractClass, false);
+SA(!__is_function(int(&)(int)));
+SA(!__is_function(int(*)(int)));
+
+SA_TEST_CATEGORY(__is_function, A, false);
+SA_TEST_CATEGORY(__is_function, decltype(::fn), false);
+
+struct FnCallOverload
+{ void operator()(); };
+SA_TEST_CATEGORY(__is_function, FnCallOverload, false);
+
+// Sanity check.

[PATCH v8 4/6] libstdc++: use new built-in trait __is_function for std::is_function

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch gets std::is_function to dispatch to new built-in trait
__is_function.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_function): Use __is_function built-in
trait.
(is_function_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2a14df7e5f9..954b57518de 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -594,6 +594,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_function
+#if __has_builtin(__is_function)
+  template
+struct is_function
+: public __bool_constant<__is_function(_Tp)>
+{ };
+#else
   template
 struct is_function
 : public __bool_constant::value> { };
@@ -605,6 +611,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_function<_Tp&&>
 : public false_type { };
+#endif
 
 #define __cpp_lib_is_null_pointer 201309L
 
@@ -3197,8 +3204,14 @@ template 
   inline constexpr bool is_union_v = __is_union(_Tp);
 template 
   inline constexpr bool is_class_v = __is_class(_Tp);
+
+#if __has_builtin(__is_function)
+template 
+  inline constexpr bool is_function_v = __is_function(_Tp);
+#else
 template 
   inline constexpr bool is_function_v = is_function<_Tp>::value;
+#endif
 
 #if __has_builtin(__is_reference)
 template 
-- 
2.41.0



[PATCH v8 1/6] c++: implement __is_reference built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_reference.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_reference.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_REFERENCE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_reference.
* g++.dg/ext/is_reference.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_reference.C  | 34 
 5 files changed, 45 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..f6951ee2670 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3705,6 +3705,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_HAS_VIRTUAL_DESTRUCTOR:
   inform (loc, "  %qT does not have a virtual destructor", t1);
   break;
+case CPTK_IS_REFERENCE:
+  inform (loc, "  %qT is not a reference", t1);
+  break;
 case CPTK_IS_ABSTRACT:
   inform (loc, "  %qT is not an abstract class", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..1e3310cd682 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -67,6 +67,7 @@ DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2)
 DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
+DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
 DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
 DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index a2e74a5d2c7..2f37bc353a1 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
+case CPTK_IS_REFERENCE:
+  return type_code1 == REFERENCE_TYPE;
+
 case CPTK_IS_LAYOUT_COMPATIBLE:
   return layout_compatible_type_p (type1, type2);
 
@@ -12289,6 +12292,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_REFERENCE:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..b697673790c 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_reference)
+# error "__has_builtin (__is_reference) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_reference.C 
b/gcc/testsuite/g++.dg/ext/is_reference.C
new file mode 100644
index 000..b5ce4db7afd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_reference.C
@@ -0,0 +1,34 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+// Positive tests.
+SA_TEST_CATEGORY(__is_reference, int&, true);
+SA_TEST_CATEGORY(__is_reference, ClassType&, true);
+SA(__is_reference(int(&)(int)));
+SA_TEST_CATEGORY(__is_reference, int&&, true);
+SA_TEST_CATEGORY(__is_reference, ClassType&&, true);
+SA(__is_reference(int(&&)(int)));
+SA_TEST_CATEGORY(__is_reference, IncompleteClass&, true);
+
+// Negative tests
+SA_TEST_CATEGORY(__is_reference, void, false);
+SA_TEST_CATEGORY(__is_reference, int*, false);
+SA_TEST_CATEGORY(__is_reference, int[3], false);
+SA(!__is_reference(int(int)));
+SA(!__is_reference(int(*const)(int)));
+SA(!__is_reference(int(*volatile)(int)));
+SA(!__is_reference(int(*const volatile)(int)));
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_reference, ClassType, false);
+SA_TEST_CATEGORY(__is_reference, IncompleteClass, false);
-- 
2.41.0



[PATCH v8 2/6] libstdc++: use new built-in trait __is_reference for std::is_reference

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch gets std::is_reference to dispatch to new built-in trait
__is_reference.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_reference): Use __is_reference built-in
trait.
(is_reference_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..2a14df7e5f9 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -639,6 +639,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Composite type categories.
 
   /// is_reference
+#if __has_builtin(__is_reference)
+  template
+struct is_reference
+: public __bool_constant<__is_reference(_Tp)>
+{ };
+#else
   template
 struct is_reference
 : public false_type
@@ -653,6 +659,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_reference<_Tp&&>
 : public true_type
 { };
+#endif
 
   /// is_arithmetic
   template
@@ -3192,12 +3199,19 @@ template 
   inline constexpr bool is_class_v = __is_class(_Tp);
 template 
   inline constexpr bool is_function_v = is_function<_Tp>::value;
+
+#if __has_builtin(__is_reference)
+template 
+  inline constexpr bool is_reference_v = __is_reference(_Tp);
+#else
 template 
   inline constexpr bool is_reference_v = false;
 template 
   inline constexpr bool is_reference_v<_Tp&> = true;
 template 
   inline constexpr bool is_reference_v<_Tp&&> = true;
+#endif
+
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
 template 
-- 
2.41.0



[PATCH v8 0/6] c++, libstdc++: get std::is_object to dispatch to new built-in traits

2023-07-07 Thread Ken Matsui via Gcc-patches
Hi,

This patch series gets std::is_object to dispatch to built-in traits and
implements the following built-in traits, on which std::object depends.

* __is_reference
* __is_function
* __is_void

std::is_object was depending on them with disjunction and negation.

__not_<__or_, is_reference<_Tp>, is_void<_Tp>>>::type

Therefore, this patch uses them directly instead of implementing an additional
built-in trait __is_object, which makes the compiler slightly bigger and
slower.

__bool_constant

This would instantiate only __bool_constant and __bool_constant,
which can be mostly shared. That is, the purpose of built-in traits is
considered as achieved.

Changes in v8

* Updated the commit author to kmat...@gcc.gnu.org considering the
expiration of my university's email address.

Ken Matsui (6):
  c++: implement __is_reference built-in trait
  libstdc++: use new built-in trait __is_reference for std::is_reference
  c++: implement __is_function built-in trait
  libstdc++: use new built-in trait __is_function for std::is_function
  c++, libstdc++: implement __is_void built-in trait
  libstdc++: make std::is_object dispatch to new built-in traits

 gcc/cp/constraint.cc  |  9 +++
 gcc/cp/cp-trait.def   |  3 +
 gcc/cp/semantics.cc   | 12 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  9 +++
 gcc/testsuite/g++.dg/ext/is_function.C| 58 +++
 gcc/testsuite/g++.dg/ext/is_reference.C   | 34 +++
 gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
 gcc/testsuite/g++.dg/tm/pr46567.C |  6 +-
 libstdc++-v3/include/bits/cpp_type_traits.h   | 15 -
 libstdc++-v3/include/debug/helper_functions.h |  5 +-
 libstdc++-v3/include/std/type_traits  | 51 
 11 files changed, 216 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C

-- 
2.41.0



[PATCH v2] libstdc++: use __is_enum built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch replaces is_enum::value with __is_enum built-in trait in
the type_traits header.

libstdc++-v3/ChangeLog:

* include/std/type_traits (__make_unsigned_selector): Use
__is_enum built-in trait.
(__make_signed_selector): Likewise.
(__underlying_type_impl): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..9f086992ebc 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -1740,7 +1740,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Select between integral and enum: not possible to be both.
   template::value,
-  bool _IsEnum = is_enum<_Tp>::value>
+  bool _IsEnum = __is_enum(_Tp)>
 class __make_unsigned_selector;
 
   template
@@ -1900,7 +1900,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Select between integral and enum: not possible to be both.
   template::value,
-  bool _IsEnum = is_enum<_Tp>::value>
+  bool _IsEnum = __is_enum(_Tp)>
 class __make_signed_selector;
 
   template
@@ -2353,7 +2353,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __common_type_fold<_CTp, _Rp, void>
 { };
 
-  template::value>
+  template
 struct __underlying_type_impl
 {
   using type = __underlying_type(_Tp);
-- 
2.41.0



[PATCH v2 2/2] libstdc++: use new built-in trait __is_scalar for std::is_scalar

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch gets std::is_scalar to dispatch to new built-in trait
__is_scalar.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_scalar): Use __is_scalar built-in
trait.
(is_scalar_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..bc90b2c61ca 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -678,11 +678,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_member_pointer;
 
   /// is_scalar
+#if __has_builtin(__is_scalar)
+  template
+struct is_scalar
+: public __bool_constant<__is_scalar(_Tp)>
+{ };
+#else
   template
 struct is_scalar
 : public __or_, is_enum<_Tp>, is_pointer<_Tp>,
is_member_pointer<_Tp>, is_null_pointer<_Tp>>::type
 { };
+#endif
 
   /// is_compound
   template
@@ -3204,8 +3211,15 @@ template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
 template 
   inline constexpr bool is_object_v = is_object<_Tp>::value;
+
+#if __has_builtin(__is_scalar)
+template 
+  inline constexpr bool is_scalar_v = __is_scalar(_Tp);
+#else
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_compound_v = is_compound<_Tp>::value;
 template 
-- 
2.41.0



[PATCH v2 1/2] c++, libstdc++: implement __is_scalar built-in trait

2023-07-07 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_scalar. The existent
__is_scalar codes were replaced with is_scalar to avoid unintentional
macro replacement by the new built-in.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_scalar.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SCALAR.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_scalar.
* g++.dg/ext/is_scalar.C: New test.
* g++.dg/tm/pr46567.C: Use is_scalar instead.
* g++.dg/torture/pr57107.C: Likewise.

libstdc++-v3/ChangeLog:

* include/bits/cpp_type_traits.h (__is_scalar): Rename to ...
(is_scalar): ... this.
* include/bits/stl_algobase.h: Use is_scalar instead.
* include/bits/valarray_array.h: Likewise.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc|  3 ++
 gcc/cp/cp-trait.def |  1 +
 gcc/cp/semantics.cc |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C|  3 ++
 gcc/testsuite/g++.dg/ext/is_scalar.C| 31 +
 gcc/testsuite/g++.dg/tm/pr46567.C   | 10 +++
 gcc/testsuite/g++.dg/torture/pr57107.C  |  4 +--
 libstdc++-v3/include/bits/cpp_type_traits.h |  2 +-
 libstdc++-v3/include/bits/stl_algobase.h|  8 +++---
 libstdc++-v3/include/bits/valarray_array.h  |  2 +-
 10 files changed, 55 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_scalar.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..4c27f2a3a62 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3751,6 +3751,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_SCALAR:
+  inform (loc, "  %qT is not a scalar type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..59ae087c457 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_SCALAR, "__is_scalar", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..3edc7f23212 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12118,6 +12118,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_SCALAR:
+  return SCALAR_TYPE_P (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_SCALAR:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..75acbdfb9fc 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_scalar)
+# error "__has_builtin (__is_scalar) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_scalar.C 
b/gcc/testsuite/g++.dg/ext/is_scalar.C
new file mode 100644
index 000..457fddc52fc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_scalar.C
@@ -0,0 +1,31 @@
+// { dg-do compile { target c++11 } }
+
+#include   // std::nullptr_t
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+// volatile return type would cause a warning.
+#define SA_FN_TEST_CATEGORY(TRAIT, TYPE, EXPECT)   \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_scalar, int, true);
+SA_TEST_CATEGORY(__is_scalar, float, true);
+SA_TEST_CATEGORY(__is_scalar, EnumType, true);
+SA_TEST_CATEGORY(__is_scalar, int*, true);

Re: Re: [PATCH] RISC-V: Support vfwmul.vv combine lowering

2023-07-07 Thread 钟居哲
Sure. 

We can come back to see in the future which doesn't change this codegen quality:
https://godbolt.org/z/d6rWPTWeW 



juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2023-07-08 05:11
To: juzhe.zh...@rivai.ai; Robin Dapp
CC: gcc-patches; kito.cheng; Kito.cheng; palmer; palmer
Subject: Re: [PATCH] RISC-V: Support vfwmul.vv combine lowering
 
 
On 7/3/23 02:42, juzhe.zh...@rivai.ai wrote:
> We failed to merge it since it's been rejected.
> https://patchwork.sourceware.org/project/gcc/patch/20230628041512.188243-1-juzhe.zh...@rivai.ai/
>  
> 
That was based on the belief that the bridging patterns should not be 
needed.  With the decision to move forward with those patterns this 
patch should be reconsidered.
 
jeff
 


Re: [PATCH] RISC-V: Support vfwmul.vv combine lowering

2023-07-07 Thread Jeff Law via Gcc-patches




On 7/3/23 02:42, juzhe.zh...@rivai.ai wrote:

We failed to merge it since it's been rejected.
https://patchwork.sourceware.org/project/gcc/patch/20230628041512.188243-1-juzhe.zh...@rivai.ai/
 

That was based on the belief that the bridging patterns should not be 
needed.  With the decision to move forward with those patterns this 
patch should be reconsidered.


jeff


[PATCH v5] rs6000: Update the vsx-vector-6.* tests.

2023-07-07 Thread Carl Love via Gcc-patches


GCC maintainers:

Ver 5. Removed -compile from the names of the compile only tests. Fixed
up the reference to the compile file names in the .h file headers. 
Replaced powerpc_vsx_ok with vsx_hw in the run test files.  Removed the
-save-temps from all files.  Retested on all of the various platforms
with no regressions.

Ver 4. Fixed a few typos.  Redid the tests to create separate run and
compile tests.

Ver 3.  Added __attribute__ ((noipa)) to the test files.  Changed some
of the scan-assembler-times checks to cover multiple similar
instructions.  Change the function check macro to a macro to generate a
function to do the test and check the results.  Retested on the various
processor types and BE/LE versions.

Ver 2.  Switched to using code macros to generate the call to the
builtin and test the results.  Added in instruction counts for the key
instruction for the builtin.  Moved the tests into an additional
function call to ensure the compile doesn't replace the builtin call
code with the statically computed results.  The compiler was doing this
for a few of the simpler tests.  

The following patch takes the tests in vsx-vector-6-p7.h,  vsx-vector-
6-p8.h, vsx-vector-6-p9.h and reorganizes them into a series of smaller
test files by functionality rather than processor version.

Tested the patch on Power 8 LE/BE, Power 9 LE/BE and Power 10 LE with
no regresions.

Please let me know if this patch is acceptable for mainline.  Thanks.

   Carl



-
rs6000: Update the vsx-vector-6.* tests.

The vsx-vector-6.h file is included into the processor specific test files
vsx-vector-6.p7.c, vsx-vector-6.p8.c, and vsx-vector-6.p9.c.  The .h file
contains a large number of vsx vector built-in tests.  The processor
specific files contain the number of instructions that the tests are
expected to generate for that processor.  The tests are compile only.

This patch reworks the tests into a series of files for related tests.
The new tests consist of a runnable test to verify the built-in argument
types and the functional correctness of each built-in.  There is also a
compile only test that verifies the built-ins generate the expected number
of instructions for the various built-in tests.

gcc/testsuite/
* gcc.target/powerpc/vsx-vector-6-func-1op.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-1op-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-1op.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2lop.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2lop-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2lop.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2op.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2op-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-2op.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-3op.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-3op-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-3op.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-cmp-all.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-cmp-all-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-cmp-all.c: New test
file.
* gcc.target/powerpc/vsx-vector-6-func-cmp.h: New test file.
* gcc.target/powerpc/vsx-vector-6-func-cmp-run.c: New test file.
* gcc.target/powerpc/vsx-vector-6-func-cmp.c: New test file.
* gcc.target/powerpc/vsx-vector-6.h: Remove test file.
* gcc.target/powerpc/vsx-vector-6.p7.c: Remove test file.
* gcc.target/powerpc/vsx-vector-6.p8.c: Remove test file.
* gcc.target/powerpc/vsx-vector-6.p9.c: Remove test file.
---
 .../powerpc/vsx-vector-6-func-1op-run.c   |  98 
 .../powerpc/vsx-vector-6-func-1op.c   |  22 ++
 .../powerpc/vsx-vector-6-func-1op.h   |  43 
 .../powerpc/vsx-vector-6-func-2lop-run.c  | 177 ++
 .../powerpc/vsx-vector-6-func-2lop.c  |  14 ++
 .../powerpc/vsx-vector-6-func-2lop.h  |  47 
 .../powerpc/vsx-vector-6-func-2op-run.c   |  96 
 .../powerpc/vsx-vector-6-func-2op.c   |  21 ++
 .../powerpc/vsx-vector-6-func-2op.h   |  42 
 .../powerpc/vsx-vector-6-func-3op-run.c   | 229 ++
 .../powerpc/vsx-vector-6-func-3op.c   |  17 ++
 .../powerpc/vsx-vector-6-func-3op.h   |  73 ++
 .../powerpc/vsx-vector-6-func-cmp-all-run.c   | 147 +++
 .../powerpc/vsx-vector-6-func-cmp-all.c   |  17 ++
 .../powerpc/vsx-vector-6-func-cmp-all.h   |  76 ++
 .../powerpc/vsx-vector-6-func-cmp-run.c   |  92 +++
 .../powerpc/vsx-vector-6-func-cmp.c   |  16 ++
 .../powerpc/vsx-vector-6-func-cmp.h   |  40 +++
 .../gcc.target/powerpc/vsx-vector-6.h | 154 
 

Re: [PATCH v4] rs6000: Update the vsx-vector-6.* tests.

2023-07-07 Thread Carl Love via Gcc-patches
On Fri, 2023-07-07 at 10:15 +0800, Kewen.Lin wrote:



> 
> > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-
> > 1op-compile.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-
> > 1op-compile.c
> > new file mode 100644
> > index 000..6b7d73ed66c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op-
> > compile.c
> 
> Nit: Maybe remove "-compile" from the name as when there is "-run"
> variant people
> are easy to realize this is for compilation, the name without "-
> compile" seems
> more neat.  With this name change, you have to update the comment
> referring it in
> its related header file accordingly.  ("sed -i 's/-compile//g' vsx-
> vector-6-func-*.h"
> recommended, similar patterns could be used for the two other
> comments below.)

Changed the compile only file names as requested.  Updated the file
names in the .h files.  Updated the Change Log file names.
 
> 
> > @@ -0,0 +1,22 @@
> > +/* { dg-do compile { target lp64 } } */
> > +/* { dg-require-effective-target powerpc_vsx_ok } */
> > +/* { dg-options "-O2 -save-temps -mvsx" } */
> 
> Nit: We don't need "-save-temps" any more for all the test cases in
> this patch.
> 
Yup, -save-temps is on automatically for compile only and we are not
checking instructions in the run file.  Removed all of the -save-temp
directives.

> > +
> > +/* This file just generates calls to the various builtins and
> > verifies the
> > +   expected number of instructions for each builtin were
> > generated.  */
> > +
> > +#include "vsx-vector-6-func-1op.h"
> > +
> > +/* { dg-final { scan-assembler-times {\mxvabssp\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrspip\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrspim\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrspi\M} 1 } } */ 
> > +/* { dg-final { scan-assembler-times {\mxvrspic\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrspiz\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvabsdp\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrdpip\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrdpim\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrdpi\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrdpic\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvrdpiz\M} 1 } } */
> > +/* { dg-final { scan-assembler-times {\mxvsqrtdp\M} 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-
> > 1op-run.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op-
> > run.c
> > new file mode 100644
> > index 000..150e372e428
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op-run.c
> > @@ -0,0 +1,98 @@
> > +/* { dg-do run { target lp64 } } */
> > +/* { dg-require-effective-target powerpc_vsx_ok } */
> 
> We need vsx_hw for those *-run.c cases instead, as powerpc_vsx_ok
> doesn't guarantee the test env can support vsx instructions, it just
> ensures it can be compiled.
> 
> /* { dg-require-effective-target vsx_hw } */
> 
> All "*-run.c" cases need changes.

Updated the run cases to use vsx_hw, removed powerpc_vsx_ok.

 Carl 



Re: [V1][PATCH 0/3] New attribute "element_count" to annotate bounds for C99 FAM(PR108896)

2023-07-07 Thread Qing Zhao via Gcc-patches
The following is the updated documentation on this new attribute, please let me 
know any suggestion and comment:

==

'counted_by (COUNT)'
 The 'counted_by' attribute may be attached to the flexible array
 member of a structure.  It indicates that the number of the
 elements of the array is given by the field named "COUNT" in the
 same structure as the flexible array member.  GCC uses this
 information to improve the results of
 '__builtin_dynamic_object_size' and array bound sanitizer.

 For instance, the following declaration:

  struct P {
size_t count;
int array[] __attribute__ ((counted_by (count)));
  } *p;

 specifies that the 'array' is a flexible array member whose number
 of elements is given by the field 'count' in the same structure.

 The field that represents the number of the elements should have an
 integer type.  An explicit 'counted_by' annotation defines a
 relationship between two objects, 'p->array' and 'p->count', that
 'p->array' has 'p->count' number of elements available.  This
 relationship must hold even after any of these related objects are
 updated.  It's the user's responsibility to make sure this
 relationship to be kept all the time.  Otherwise the results of the
 '__builtin_dynamic_object_size' and array bound sanitizer might be
 incorrect.

 For instance, the following 2nd update to the field 'count' of the
 above structure will permit out-of-bounds access to the array
 'sbuf>array':

  struct P *sbuf;
  void alloc_buf (size_t nelems)
  {
sbuf = (int *) malloc (sizeof (struct P) + sizeof (int) * nelems);
sbuf->count = nelems;
  }
  void use_buf (int index)
  {
sbuf->count++;
/* Now the value of sbuf->count and the number
   of elements of sbuf->array is out of sync.  */
sbuf->array[index] = 0;
/* then the out-of-bound access to this array
   might not be detected.  */
  }

 The users can use the warning option '-Wcounted-by-attribute' to
 detect such user errors during compilation time, or the sanitizer
 option '-fsanitize=counted-by-attribute' to detect such user errors
 during runtime.

=

Qing

> On Jul 7, 2023, at 11:47 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> 
> 
>> On Jul 6, 2023, at 5:10 PM, Martin Uecker  wrote:
>> 
>> Am Donnerstag, dem 06.07.2023 um 18:56 + schrieb Qing Zhao:
>>> Hi, Kees,
>>> 
>>> I have updated my V1 patch with the following changes:
>>> A. changed the name to "counted_by"
>>> B. changed the argument from a string to an identifier
>>> C. updated the documentation and testing cases accordingly.
>>> 
>>> And then used this new gcc to test 
>>> https://github.com/kees/kernel-tools/blob/trunk/fortify/array-bounds.c 
>>> (with the following change)
>>> [opc@qinzhao-ol8u3-x86 Kees]$ !1091
>>> diff array-bounds.c array-bounds.c.org
>>> 32c32
>>> < # define __counted_by(member) __attribute__((counted_by (member)))
>>> ---
 # define __counted_by(member)  
 __attribute__((__element_count__(#member)))
>>> 34c34
>>> < # define __counted_by(member)   __attribute__((counted_by (member)))
>>> ---
 # define __counted_by(member)  /* 
 __attribute__((__element_count__(#member))) */
>>> 
>>> Then I got the following result:
>>> [opc@qinzhao-ol8u3-x86 Kees]$ ./array-bounds 2>&1 | grep -v ^'#'
>>> TAP version 13
>>> 1..12
>>> ok 1 global.fixed_size_seen_by_bdos
>>> ok 2 global.fixed_size_enforced_by_sanitizer
>>> not ok 3 global.unknown_size_unknown_to_bdos
>>> not ok 4 global.unknown_size_ignored_by_sanitizer
>>> ok 5 global.alloc_size_seen_by_bdos
>>> ok 6 global.alloc_size_enforced_by_sanitizer
>>> not ok 7 global.element_count_seen_by_bdos
>>> ok 8 global.element_count_enforced_by_sanitizer
>>> not ok 9 global.alloc_size_with_smaller_element_count_seen_by_bdos
>>> not ok 10 global.alloc_size_with_smaller_element_count_enforced_by_sanitizer
>>> ok 11 global.alloc_size_with_bigger_element_count_seen_by_bdos
>>> ok 12 global.alloc_size_with_bigger_element_count_enforced_by_sanitizer
>>> 
>>> The same as your previous results. Then I took a look at all the failed 
>>> testing: 3, 4, 7, 9, and 10. And studied the reasons for all of them.
>>> 
>>> in a summary, there are two major issues:
>>> 1.  The reason for the failed testing 7 is the same issue as I observed in 
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109557
>>> Which is not a bug, it’s an expected behavior. 
>>> 
>>> 2. The common issue for  the failed testing 3, 4, 9, 10 is:
>>> 
>>> for the following annotated structure: 
>>> 
>>> 
>>> struct annotated {
>>>unsigned long flags;
>>>size_t foo;
>>>int array[] __attribute__((counted_by (foo)));
>>> };
>>> 
>>> 
>>> struct annotated *p;
>>> int index = 16;
>>> 
>>> p = malloc(sizeof(*p) + 

[PATCH ver 3] rs6000, fix vec_replace_unaligned built-in arguments

2023-07-07 Thread Carl Love via Gcc-patches


GCC maintainers:

Version 3, added code to altivec_resolve_overloaded_builtin so the
correct instruction is selected for the size of the second argument. 
This restores the instruction counts to the original values where the
correct instructions were originally being generated.  The naming of
the overloaded builtin instances and builtin definitions were changed
to reflect the type of the second argument since the type of the first
argument is now the same for all overloaded instances.  A new builtin
test file was added for the case where the first argument is cast to
the unsigned long long type.  This test requires the -flax-vector-
conversions gcc command line option.  Since the other tests do not
require this option, I felt that the new test needed to be in a
separate file.  Finally some formatting fixes were made in the original
test file.  Patch has been retested on Power 10 with no regressions.

Version 2, fixed various typos.  Updated the change log body to say the
instruction counts were updated.  The instruction counts changed as a
result of changing the first argument of the vec_replace_unaligned
builtin call from vector unsigned long long (vull) to vector unsigned
char (vuc).  When the first argument was vull the builtin call
generated the vinsd instruction for the two test cases.  The updated
call with vuc as the first argument generates two vinsw instructions
instead.  Patch was retested on Power 10 with no regressions.

The following patch fixes the first argument in the builtin definition
and the corresponding test cases.  Initially, the builtin specification
was wrong due to a cut and past error.  The documentation was fixed in:

   commit ed3fea09b18f67e757b5768b42cb6e816626f1db
   Author: Bill Schmidt 
   Date:   Fri Feb 4 13:07:17 2022 -0600

   rs6000: Correct function prototypes for vec_replace_unaligned

   Due to a pasto error in the documentation, vec_replace_unaligned
was
   implemented with the same function prototypes as
vec_replace_elt.  It was
   intended that vec_replace_unaligned always specify output
vectors as having
   type vector unsigned char, to emphasize that elements are
potentially
   misaligned by this built-in function.  This patch corrects the
   misimplementation.


This patch fixes the arguments in the definitions and updates the
testcases accordingly.  Additionally, a few minor spacing issues are
fixed.

The patch has been tested on Power 10 with no regressions.  Please let
me know if the patch is acceptable for mainline.  Thanks.

 Carl 

--
rs6000, fix vec_replace_unaligned built-in arguments

The first argument of the vec_replace_unaligned built-in should always be
unsigned char, as specified in gcc/doc/extend.texi.

This patch fixes the builtin definitions and updates the test cases to use
the correct arguments.  The original test file is renamed and a second test
file is added for a new test case.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def: Rename
__builtin_altivec_vreplace_un_uv2di as __builtin_altivec_vreplace_un_udi
__builtin_altivec_vreplace_un_uv4si as __builtin_altivec_vreplace_un_usi
__builtin_altivec_vreplace_un_v2df as __builtin_altivec_vreplace_un_df
__builtin_altivec_vreplace_un_v2di as __builtin_altivec_vreplace_un_di
__builtin_altivec_vreplace_un_v4sf as __builtin_altivec_vreplace_un_sf
__builtin_altivec_vreplace_un_v4si as __builtin_altivec_vreplace_un_si.
Rename VREPLACE_UN_UV2DI as VREPLACE_UN_UDI, VREPLACE_UN_UV4SI as
VREPLACE_UN_USI, VREPLACE_UN_V2DF as VREPLACE_UN_DF,
VREPLACE_UN_V2DI as VREPLACE_UN_DI, VREPLACE_UN_V4SF as
VREPLACE_UN_SF, VREPLACE_UN_V4SI as VREPLACE_UN_SI.
Rename vreplace_un_v2di as vreplace_un_di, vreplace_un_v4si as
vreplace_un_si, vreplace_un_v2df as vreplace_un_df,
vreplace_un_v2di as vreplace_un_di, vreplace_un_v4sf as
vreplace_un_sf, vreplace_un_v4si as vreplace_un_si.
* config/rs6000/rs6000-c.cc (find_instance): Add new argument
nargs.  Add nargs check.  Extend function to handle three arguments.
(altivec_resolve_overloaded_builtin): Add new argument nargs to
function calls.  Add case RS6000_OVLD_VEC_REPLACE_UN.
* config/rs6000/rs6000-overload.def (__builtin_vec_replace_un):
Fix first argument type.  Rename VREPLACE_UN_UV4SI as
VREPLACE_UN_USI, VREPLACE_UN_V4SI as VREPLACE_UN_SI,
VREPLACE_UN_UV2DI as VREPLACE_UN_UDI, VREPLACE_UN_V2DI as
VREPLACE_UN_DI, VREPLACE_UN_V4SF as VREPLACE_UN_SF,
VREPLACE_UN_V2DF as VREPLACE_UN_DF.
* config/rs6000/vsx.md (VEC_RU): New mode iterator.
(VEC_RU_char): New mode attribute.
(vreplace_un_): Change iterator and mode attribute.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vec-replace-word-runnable.c: Renamed
vec-replace-word-runnable_1.c.
* 

Re: [PATCH] rs6000, fix vec_replace_unaligned builtin arguments

2023-07-07 Thread Carl Love via Gcc-patches
Kewen:

On Mon, 2023-06-19 at 11:50 +0800, Kewen.Lin wrote:
> > generated the vinsd instruction for the two calls with the first
> > argument of unsigned long long int.  When the first argument of the
> > builtin is changed to the correct type, vector unsigned char the
> > builtin generates the vinsw instruction instead.  The change occurs
> > in
> > two places resulting in reducing the counts for vinsd by two and
> > increasing the counts for vinsw by two.  The other calls to the
> > builtin
> > are either vector ints or vector floats which generate the vinsw
> > instruction.  Changing the first argument in those calls to vector
> > unsigned char still generate the vinsw instruction.
> 
> But it did expose something odd and needed to be handled in this
> change.
> I had a further check, for the below test case:
> 
> #include "altivec.h"
> 
> #ifdef ORIG
> vector unsigned char foo (vector unsigned long long v){
>   unsigned long long val = 678ull;
>   return vec_replace_unaligned (v, val, 7);
> }
> #else
> vector unsigned char foo (vector unsigned long long v){
>   unsigned long long val = 678ull;
>   return vec_replace_unaligned ((vector unsigned char)v, val, 7);
> }
> #endif
> 
> Without this patch (-DORIG required to match the previous prototype),
> it would generate vinsd; while with this proposed patch, it would
> generate vinsw.  I think it's unexpected since users can still have
> the need to replace a doubleword size of chunk but give a constant
> which can be represented by int.  The previous way can support it,
> while the new way can't.  So we should have some way to distinguish
> it, we have some special-casing in function
> altivec_resolve_overloaded_builtin, could you have a check and try
> there?  Thanks!

I added the needed handling in altivec_resolve_overloaded_builtin to
address the issue with the built-in generating the correct instruction
for the unsigned long long cases in the test file.  I added an
additional test file with the above test case.  It was put into a new
test file as it requires the -flax-vector-conversions argument.  I felt
that it was best to separate the tests that need/do not need the -flax-
vector-conversions argument.

Note, adding the additional case statement RS6000_OVLD_VEC_REPLACE_UN
to handle the three argument built-in vec_replace_unaligned in
altivec_resolve_overloaded_builtin exposed an issue with function
find_instance.  Function find_instance assumes there are only two
arguments in the builtin.  There are no checks on the actual number of
arguments used by the built-in. This leads to an error in
tree_operand_check_failed() when using find_builtin.  The find_buitin
function was extended to handle 2 or 3 arguments with a check to make
sure the number of arguments is either 2 or 3.

FYI, I also noticed in the current patch the names in rs6000-
builtins.def and rs6000-overload.def for builtin_altivec_vreplace_un
still reflect the type of the first argument.  The current patch
changes the first argument to vuc, but the naming didn't all get
updated.  I think the names should be changed to reflect the name of
the second argument since the first arguments are all identical.  For
example:
 
-- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3388,29 +3388,29 @@
   const vull __builtin_altivec_vpextd (vull, vull);
 VPEXTD vpextd {}
 
   -  const vuc __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, \
   - const int<4>);
   -VREPLACE_UN_UV2DI vreplace_un_v2di {}
   +  const vuc __builtin_altivec_vreplace_un_udi (vuc, unsigned long long, \
   +   const int<4>);
   +VREPLACE_UN_UDI vreplace_un_di {}
 
 The name changes will ripple thru files rs6000-builtins.def, rs6000-
 overload.def and vsx.md.

I did all the naming as well in the new version 3 of the patch.

 Carl 



[committed] d: Fix PR 108842: Cannot use enum array with -fno-druntime

2023-07-07 Thread Iain Buclaw via Gcc-patches
Hi,

This patch restricts generating of CONST_DECLs for D manifest constants
to just scalars without pointers.  It shouldn't happen that a reference
to a manifest constant has not been expanded within a function body
during codegen, but it has been found to occur in older versions of the
D front-end (PR98277), so if the decl of a non-scalar constant is
requested, just return its initializer as an expression.

Bootstrapped and regresson tested on x86_64-linux-gnu/-m32, committed to
mainline, and backported to the gcc-11, gcc-12, and gcc-13 release
branches.

Regards,
Iain.

---
PR d/108842

gcc/d/ChangeLog:

* decl.cc (DeclVisitor::visit (VarDeclaration *)): Only emit scalar
manifest constants.
(get_symbol_decl): Don't generate CONST_DECL for non-scalar manifest
constants.
* imports.cc (ImportVisitor::visit (VarDeclaration *)): New method.

gcc/testsuite/ChangeLog:

* gdc.dg/pr98277.d: Add more tests.
* gdc.dg/pr108842.d: New test.
---
 gcc/d/decl.cc   | 36 +++--
 gcc/d/imports.cc|  9 +
 gcc/testsuite/gdc.dg/pr108842.d |  4 
 gcc/testsuite/gdc.dg/pr98277.d  | 11 ++
 4 files changed, 45 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gdc.dg/pr108842.d

diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index 3f980851259..0375ede082b 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -782,7 +782,7 @@ public:
   {
/* Do not store variables we cannot take the address of,
   but keep the values for purposes of debugging.  */
-   if (!d->type->isscalar ())
+   if (d->type->isscalar () && !d->type->hasPointers ())
  {
tree decl = get_symbol_decl (d);
d_pushdecl (decl);
@@ -1212,6 +1212,20 @@ get_symbol_decl (Declaration *decl)
   return decl->csym;
 }
 
+  if (VarDeclaration *vd = decl->isVarDeclaration ())
+{
+  /* CONST_DECL was initially intended for enumerals and may be used for
+scalars in general, but not for aggregates.  Here a non-constant
+value is generated anyway so as its value can be used.  */
+  if (!vd->canTakeAddressOf () && !vd->type->isscalar ())
+   {
+ gcc_assert (vd->_init && !vd->_init->isVoidInitializer ());
+ Expression *ie = initializerToExpression (vd->_init);
+ decl->csym = build_expr (ie, false);
+ return decl->csym;
+   }
+}
+
   /* Build the tree for the symbol.  */
   FuncDeclaration *fd = decl->isFuncDeclaration ();
   if (fd)
@@ -1259,23 +1273,15 @@ get_symbol_decl (Declaration *decl)
   if (vd->storage_class & STCextern)
DECL_EXTERNAL (decl->csym) = 1;
 
-  /* CONST_DECL was initially intended for enumerals and may be used for
-scalars in general, but not for aggregates.  Here a non-constant
-value is generated anyway so as the CONST_DECL only serves as a
-placeholder for the value, however the DECL itself should never be
-referenced in any generated code, or passed to the back-end.  */
-  if (vd->storage_class & STCmanifest)
+  if (!vd->canTakeAddressOf ())
{
  /* Cannot make an expression out of a void initializer.  */
- if (vd->_init && !vd->_init->isVoidInitializer ())
-   {
- Expression *ie = initializerToExpression (vd->_init);
+ gcc_assert (vd->_init && !vd->_init->isVoidInitializer ());
+ /* Non-scalar manifest constants have already been dealt with.  */
+ gcc_assert (vd->type->isscalar ());
 
- if (!vd->type->isscalar ())
-   DECL_INITIAL (decl->csym) = build_expr (ie, false);
- else
-   DECL_INITIAL (decl->csym) = build_expr (ie, true);
-   }
+ Expression *ie = initializerToExpression (vd->_init);
+ DECL_INITIAL (decl->csym) = build_expr (ie, true);
}
 
   /* [type-qualifiers/const-and-immutable]
diff --git a/gcc/d/imports.cc b/gcc/d/imports.cc
index 2efef4ed54f..3172b799cb0 100644
--- a/gcc/d/imports.cc
+++ b/gcc/d/imports.cc
@@ -127,6 +127,15 @@ public:
 this->result_ = this->make_import (TYPE_STUB_DECL (type));
   }
 
+  void visit (VarDeclaration *d) final override
+  {
+/* Not all kinds of manifest constants create a CONST_DECL.  */
+if (!d->canTakeAddressOf () && !d->type->isscalar ())
+  return;
+
+visit ((Declaration *) d);
+  }
+
   /* For now, ignore importing other kinds of dsymbols.  */
   void visit (ScopeDsymbol *) final override
   {
diff --git a/gcc/testsuite/gdc.dg/pr108842.d b/gcc/testsuite/gdc.dg/pr108842.d
new file mode 100644
index 000..5aae9e5000d
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr108842.d
@@ -0,0 +1,4 @@
+// { dg-do compile }
+// { dg-options "-fno-rtti" }
+module object;
+enum int[] x = [0, 1, 2];
diff --git a/gcc/testsuite/gdc.dg/pr98277.d b/gcc/testsuite/gdc.dg/pr98277.d
index 0dff142a6ef..c88c735dec8 100644
--- 

[PATCH] Fortran: simplification of FINDLOC for constant complex arguments [PR110585]

2023-07-07 Thread Harald Anlauf via Gcc-patches
Dear all,

I intend to commit the attached obvious patch within 24h unless
someone objects.  gfc_compare_expr() did not handle the case of
complex constants, which may be compared for equality.  This
case is needed in the simplification of the FINDLOC intrinsic.

Regtested on x86_64-pc-linux-gnu.

Thanks,
Harald

From b6c4f70d2dac4863335874f0bd3486ea7db348d7 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Fri, 7 Jul 2023 20:25:06 +0200
Subject: [PATCH] Fortran: simplification of FINDLOC for constant complex
 arguments [PR110585]

gcc/fortran/ChangeLog:

	PR fortran/110585
	* arith.cc (gfc_compare_expr): Handle equality comparison of constant
	complex gfc_expr arguments.

gcc/testsuite/ChangeLog:

	PR fortran/110585
	* gfortran.dg/findloc_9.f90: New test.
---
 gcc/fortran/arith.cc|  5 +
 gcc/testsuite/gfortran.dg/findloc_9.f90 | 19 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/findloc_9.f90

diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index 86d56406047..f9c6658f860 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -1120,6 +1120,11 @@ gfc_compare_expr (gfc_expr *op1, gfc_expr *op2, gfc_intrinsic_op op)
 	|| (op1->value.logical && !op2->value.logical));
   break;

+case BT_COMPLEX:
+  gcc_assert (op == INTRINSIC_EQ);
+  rc = mpc_cmp (op1->value.complex, op2->value.complex);
+  break;
+
 default:
   gfc_internal_error ("gfc_compare_expr(): Bad basic type");
 }
diff --git a/gcc/testsuite/gfortran.dg/findloc_9.f90 b/gcc/testsuite/gfortran.dg/findloc_9.f90
new file mode 100644
index 000..05974476cb3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/findloc_9.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options "-fdump-tree-original" }
+! PR fortran/110585 - simplification of FINDLOC for constant complex arguments
+
+program mvce
+  implicit none
+  integer, parameter :: a(*) = findloc([(1.,0.),(2.,1.)], (2.,0.))
+  integer, parameter :: b(*) = findloc([(1.,0.),(2.,1.)], (2.,0.), back=.true.)
+  integer, parameter :: c(*) = findloc([(1.,0.),(2.,1.)], (2.,1.))
+  integer, parameter :: d(*) = findloc([(1.,0.),(2.,1.)], (2.,1.), back=.true.)
+  integer, parameter :: e= findloc([(1.,0.),(2.,1.)], (2.,1.), dim=1)
+  if (a(1) /= 0) stop 1
+  if (b(1) /= 0) stop 2
+  if (c(1) /= 2) stop 3
+  if (d(1) /= 2) stop 4
+  if (e/= 2) stop 5
+end
+
+! { dg-final { scan-tree-dump-not "_gfortran_stop_numeric" "original" } }
--
2.35.3



Re: [PATCH] Fortran: fixes for procedures with ALLOCATABLE,INTENT(OUT) arguments [PR92178]

2023-07-07 Thread Harald Anlauf via Gcc-patches

Hi Mikael,

Am 07.07.23 um 14:21 schrieb Mikael Morin:

I'm attaching what I have (lightly) tested so far, which doesn't work.
It seems gfc_conv_class_to_class reevaluates part of the original
expression, which is not correct after deallocation.


this looks much more elegant than my attempt that passed an additional
argument to gfc_conv_class_to_class, to achieve what your patch does.


Will have a look again tonight.


Great.

Harald




[PATCH] Fix PR 110539: missed optimization after moving two_value to match.pd

2023-07-07 Thread Andrew Pinski via Gcc-patches
When I moved two_value to match.pd, I removed the check for the {0,+-1}
as I had placed it after the {0,+-1} case for cond in match.pd.
In the case of {0,+-1} and non boolean, before we would optmize those
case to just `(convert)a` but after we would get `(convert)(a != 0)`
which was not handled anyways to just `(convert)a`.
So this adds a pattern to match `(convert)(zeroone != 0)` and simplify
to `(convert)zeroone`.

In the bug report, we do finally optimize `(convert)(zeroone != 0)` to
`zeroone` in VRP2 but that in itself is too late and we miss other
optimizations that would have happened.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/110539
* match.pd ((convert)(zeroone !=/== 0)): Match
and simplify to ((convert)zeroone)){,^1}.

gcc/testsuite/ChangeLog:

PR tree-optimization/110539
* gcc.dg/tree-ssa/pr110539-1.c: New test.
* gcc.dg/tree-ssa/pr110539-2.c: New test.
* gcc.dg/tree-ssa/pr110539-3.c: New test.
---
 gcc/match.pd   | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c | 70 ++
 4 files changed, 109 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index c709153217a..87767a7778b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2060,6 +2060,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
 (vec_cond @0 @2 @3)))
 
+#if GIMPLE
+/* This cannot be done on generic as fold has the
+   exact opposite transformation:
+   `Fold ~X & 1 as (X & 1) == 0.`
+   `Fold (X ^ 1) & 1 as (X & 1) == 0.`  */
+/* (convert)(zeroone != 0) into (convert)zeroone */
+/* (convert)(zeroone == 0) into (convert)(zeroone^1) */
+(for neeq (ne eq)
+ (simplify
+  (convert (neeq zero_one_valued_p@0 integer_zerop))
+  (if (neeq == NE_EXPR)
+   (convert @0)
+   (convert (bit_xor @0 { build_one_cst (TREE_TYPE (@0)); } )
+#endif
+
 /* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
 (simplify
  (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
new file mode 100644
index 000..6ba864cdd13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+int b = a & 1;
+int c = b != 0;
+return c == b;
+}
+
+/* This should be optimized to just return 1; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
new file mode 100644
index 000..17874d349ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+int b = a & 1;
+int c = b == 0;
+return c == b;
+}
+
+/* This should be optimized to just return 0; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
new file mode 100644
index 000..c8ef6f56dcd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void foo(void);
+static int a, c = 1;
+static short b;
+static int *d = , *e = 
+static int **f = 
+void __assert_fail() __attribute__((__noreturn__));
+static void g(short h) {
+if (*d)
+;
+else {
+if (e) __assert_fail();
+if (a) {
+__builtin_unreachable();
+} else
+__assert_fail();
+}
+if 0, 0) || h) == h) + b) *f = 0;
+}
+int main() {
+int i = 0 != 10 & a;
+g(i);
+*e = 9;
+e = 0;
+if (d == 0)
+;
+else
+foo();
+;
+}
+/* The call to foo should be optimized away. */
+/* The missed optimization at -O2 here was:
+int b = a & 1;
+int c = b != 0;
+int d = c == b;
+  not being optimized to 1 early enough, it is done in vrp2 but
+  that is too late.
+  In phiopt2 we got:
+_17 = i_7 != 0;
+_12 = (int) _17;
+if (i_7 == _12)
+  goto ; [50.00%]
+else
+  goto ; [50.00%]
+
+ [local count: 268435456]:
+d = 0B;
+
+ [local count: 536870913]:
+e.1_3 = e;
+*e.1_3 = 9;
+e = 0B;
+d.2_4 = d;
+if (d.2_4 == 0B)
+
+  The first if is not optimized before, until 

Re: [PATCH] rs6000: Don't ICE when generating vector pair load/store insns [PR110411]

2023-07-07 Thread Peter Bergner via Gcc-patches
On 7/6/23 6:28 PM, Segher Boessenkool wrote:
> On Thu, Jul 06, 2023 at 02:48:19PM -0500, Peter Bergner wrote:
>> On 7/6/23 12:33 PM, Segher Boessenkool wrote:
>>> On Wed, Jul 05, 2023 at 05:21:18PM +0530, P Jeevitha wrote:
 --- a/gcc/config/rs6000/rs6000.cc
 +++ b/gcc/config/rs6000/rs6000.cc
 @@ -9894,6 +9894,8 @@ rs6000_legitimate_address_p (machine_mode mode, rtx 
 x, bool reg_ok_strict)
  
/* Handle unaligned altivec lvx/stvx type addresses.  */
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
 +  && mode !=  OOmode
 +  && mode !=  XOmode
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -16)
>>>
>>> Why do we need this for OOmode and XOmode here, but not for the other
>>> modes that are equally not allowed?  That makes no sense.
>>
>> VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) already filters those modes out
>> (eg, SImode, DFmode, etc.), just not OOmode and XOmode, since those both
>> are modes used in/with VSX registers.
> 
> It does not filter anything out, no.  That simply checks if a datum of
> that mode can be loaded into vector registers or not.  For example
> SImode could very well be loaded into vector registers!  (It just is not
> such a great idea).

I spent some time looking at how the compiler fixes this up in the
-mno-block-ops-vector-pair case and I see the constraints used in the
vsx_mov_64bit pattern for loads and stores disallows these types
of addresses, so LRA fixes them up for us.  Clearly movoo should do the
same and that is enough to fix the ICE.  I'll work with Jeevitha on
submitting a patch using that solution.

That said, I think it would be good to modify rs6000_legitimate_address_p
to disallow these altivec style addresses for OOmode and XOmode, since we
know early-on that they're not going to be valid, but that would be a
different patch.




> dg-do compile *does* invoke the assembler, btw.  As it should.

There is dg-do "preprocess", "compile", "assemble", "link" and "run"
(ignoring "precompile" and "repo").  Dg-do compile produces an assembly
file, but doesn't actually call the assembler, which we don't strictly
need for a test case that checks whether GCC ICEs or not.  If you want
to run the assembler too and then stop, then you'd want dg-do assemble.

Peter




Cleanup force_edge_cold

2023-07-07 Thread Jan Hubicka via Gcc-patches
Hi,
we can use the new set_edge_probability_and_rescale_others here.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

gcc/ChangeLog:

* predict.cc (force_edge_cold): Use
set_edge_probability_and_rescale_others; improve dumps.

diff --git a/gcc/predict.cc b/gcc/predict.cc
index d65c26f0ab2..1aa4c25eb70 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -4421,21 +4421,16 @@ force_edge_cold (edge e, bool impossible)
  there.  */
   else if (prob_sum > profile_probability::never ())
 {
-  if (!(e->probability < goal))
-   e->probability = goal;
-
-  profile_probability prob_comp = prob_sum / e->probability.invert ();
-
   if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "Making edge %i->%i %s by redistributing "
-"probability to other edges.\n",
-e->src->index, e->dest->index,
-impossible ? "impossible" : "cold");
-  FOR_EACH_EDGE (e2, ei, e->src->succs)
-   if (e2 != e)
- {
-   e2->probability /= prob_comp;
- }
+   {
+ fprintf (dump_file, "Making edge %i->%i %s by redistributing "
+  "probability to other edges. Original probability: ",
+  e->src->index, e->dest->index,
+  impossible ? "impossible" : "cold");
+ e->probability.dump (dump_file);
+ fprintf (dump_file, "\n");
+   }
+  set_edge_probability_and_rescale_others (e, goal);
   if (current_ir_type () != IR_GIMPLE
  && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
update_br_prob_note (e->src);


Fix some profile consistency testcases

2023-07-07 Thread Jan Hubicka via Gcc-patches
Hi,
Information about profile mismatches is printed only with -details-blocks for 
some time.
I think it should be printed even with default to make it easier to spot when 
someone introduces
new transform that breaks the profile, but I will send separate RFC for that.

This patch enables details in all testcases that greps for Invalid sum.  There 
are 4 testcases
which fails:
  gcc.dg/tree-ssa/loop-ch-profile-1.c
 here the problem is that loop header dulication introduces loop invariant 
conditoinal that is later
 updated by tree-ssa-dom but dom does not take care of updating profile.
 Since loop-ch knows when it duplicates loop invariant, we may be able to 
get this right.

 The test is still useful since it tests that right after ch profile is 
consistent.
  gcc.dg/tree-prof/update-cunroll-2.c
 This is about profile updating code in duplicate_loop_body_to_header_edge 
being wrong when optimized
 out exit is not last in the loop.  In that case the probability of later 
exits needs to be accounted in.
 I will think about making this better - in general this does not seem to 
have easy solution, but for
 special case of chained tests we can definitely account for the later 
exits.
  gcc.dg/tree-ssa/update-unroll-1.c
 This fails after aprefetch invoked unrolling.  I did not look into details 
yet.
  gcc.dg/tree-prof/update-unroll-2.c
 This one seems similar as previous
I decided to xfail these tests and deal with them incrementally and filled in 
PR110590.

Regtested x86_64-linux, will commit it shortly.
Honza
gcc/testsuite/ChangeLog:

* g++.dg/tree-prof/indir-call-prof.C: Add block-details to dump flags.
* gcc.dg/pr43864-2.c: Likewise.
* gcc.dg/pr43864-3.c: Likewise.
* gcc.dg/pr43864-4.c: Likewise.
* gcc.dg/pr43864.c: Likewise.
* gcc.dg/tree-prof/cold_partition_label.c: Likewise.
* gcc.dg/tree-prof/indir-call-prof.c: Likewise.
* gcc.dg/tree-prof/update-cunroll-2.c: Likewise.
* gcc.dg/tree-prof/update-tailcall.c: Likewise.
* gcc.dg/tree-prof/val-prof-1.c: Likewise.
* gcc.dg/tree-prof/val-prof-2.c: Likewise.
* gcc.dg/tree-prof/val-prof-3.c: Likewise.
* gcc.dg/tree-prof/val-prof-4.c: Likewise.
* gcc.dg/tree-prof/val-prof-5.c: Likewise.
* gcc.dg/tree-ssa/fnsplit-1.c: Likewise.
* gcc.dg/tree-ssa/loop-ch-profile-2.c: Likewise.
* gcc.dg/tree-ssa/update-threading.c: Likewise.
* gcc.dg/tree-ssa/update-unswitch-1.c: Likewise.
* gcc.dg/unroll-7.c: Likewise.
* gcc.dg/unroll-8.c: Likewise.
* gfortran.dg/pr25623-2.f90: Likewise.
* gfortran.dg/pr25623.f90: Likewise.
* gcc.dg/tree-ssa/loop-ch-profile-1.c: Likewise; xfail.
* gcc.dg/tree-ssa/update-cunroll.c: Likewise; xfail.
* gcc.dg/tree-ssa/update-unroll-1.c: Likewise; xfail.

diff --git a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C 
b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
index b45417106d0..1f7404600ed 100644
--- a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
+++ b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized 
-fdump-ipa-afdo-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-blocks-details 
-fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
 
 struct A {
   A () {}
diff --git a/gcc/testsuite/gcc.dg/pr43864-2.c b/gcc/testsuite/gcc.dg/pr43864-2.c
index 6393144ccf7..102295ad4e8 100644
--- a/gcc/testsuite/gcc.dg/pr43864-2.c
+++ b/gcc/testsuite/gcc.dg/pr43864-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre-details-blocks" } */
 
 int
 f (int c, int b, int d)
diff --git a/gcc/testsuite/gcc.dg/pr43864-3.c b/gcc/testsuite/gcc.dg/pr43864-3.c
index 24b59a14b96..f70b8764cc8 100644
--- a/gcc/testsuite/gcc.dg/pr43864-3.c
+++ b/gcc/testsuite/gcc.dg/pr43864-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -fdump-tree-pre-blocks-details" } */
 
 /* Commutative case.  */
 
diff --git a/gcc/testsuite/gcc.dg/pr43864-4.c b/gcc/testsuite/gcc.dg/pr43864-4.c
index 8a25b0fd8ef..3de71fccfa2 100644
--- a/gcc/testsuite/gcc.dg/pr43864-4.c
+++ b/gcc/testsuite/gcc.dg/pr43864-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre-details-blocks" } */
 
 /* Different stmt order.  */
 
diff --git a/gcc/testsuite/gcc.dg/pr43864.c b/gcc/testsuite/gcc.dg/pr43864.c
index ed69a737d3f..7bd1ba69281 100644
--- a/gcc/testsuite/gcc.dg/pr43864.c
+++ b/gcc/testsuite/gcc.dg/pr43864.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -fdump-tree-pre-details-blocks" } */
 
 

[PATCH] libstdc++: Compile basic_file_stdio.cc for LFS

2023-07-07 Thread Jonathan Wakely via Gcc-patches
When this code was written we didn't have the 
header, so it conditionally used lseek64 etc.

Since this is compiled into the library, not inline code in headers, we
can just define the relevant macros to get a 64-bit off_t and then
simplify the code.

Tested x86_64-linux. I intend to commit this next week.

-- >8 --

Instead of using fopen64, lseek64, and fstat64 we can just include
 which defines _FILE_OFFSET_BITS=64 (and
similar target-specific macros). Then we can just use fopen, lseek and
fstat as normal, and they'll be the LFS versions if supported by the
target.

libstdc++-v3/ChangeLog:

* config/io/basic_file_stdio.cc: Define LFS macros.
(__basic_file::open): Use fopen unconditionally.
(get_file_offset): Use lseek unconditionally.
(__basic_file::seekoff): Likewise.
(__basic_file::showmanyc): Use fstat unconditionally.
---
 libstdc++-v3/config/io/basic_file_stdio.cc | 25 ++
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/libstdc++-v3/config/io/basic_file_stdio.cc 
b/libstdc++-v3/config/io/basic_file_stdio.cc
index 27c2ad2afe3..7b1729a798f 100644
--- a/libstdc++-v3/config/io/basic_file_stdio.cc
+++ b/libstdc++-v3/config/io/basic_file_stdio.cc
@@ -26,6 +26,7 @@
 // ISO C++ 14882: 27.8  File-based streams
 //
 
+#include 
 #include 
 #include 
 #include 
@@ -251,11 +252,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 const char* __c_mode = fopen_mode(__mode);
 if (__c_mode && !this->is_open())
   {
-#ifdef _GLIBCXX_USE_LFS
-   if ((_M_cfile = fopen64(__name, __c_mode)))
-#else
if ((_M_cfile = fopen(__name, __c_mode)))
-#endif
  {
_M_cfile_created = true;
__ret = this;
@@ -389,8 +386,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 # else
   return ftell(__f->file());
 # endif
-#elif defined(_GLIBCXX_USE_LFS)
-  return lseek64(__f->fd(), 0, (int)ios_base::cur);
 #else
   return lseek(__f->fd(), 0, (int)ios_base::cur);
 #endif
@@ -417,11 +412,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  return -1;
   }
 return __way == ios_base::beg ? __off : std::get_file_offset(this);
-#elif defined(_GLIBCXX_USE_LFS)
-return lseek64(this->fd(), __off, __way);
 #else
-if (__off > numeric_limits::max()
- || __off < numeric_limits::min())
+if _GLIBCXX17_CONSTEXPR (sizeof(streamoff) > sizeof(off_t))
+  if (__off > numeric_limits::max()
+   || __off < numeric_limits::min())
   return -1L;
 return lseek(this->fd(), __off, __way);
 #endif
@@ -455,20 +449,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if defined(_GLIBCXX_HAVE_S_ISREG) || defined(_GLIBCXX_HAVE_S_IFREG)
 // Regular files.
-#ifdef _GLIBCXX_USE_LFS
-struct stat64 __buffer;
-const int __err = fstat64(this->fd(), &__buffer);
+struct stat __buffer;
+const int __err = fstat(this->fd(), &__buffer);
 if (!__err && _GLIBCXX_ISREG(__buffer.st_mode))
   {
const streamoff __off = __buffer.st_size - std::get_file_offset(this);
return std::min(__off, streamoff(numeric_limits::max()));
   }
-#else
-struct stat __buffer;
-const int __err = fstat(this->fd(), &__buffer);
-if (!__err && _GLIBCXX_ISREG(__buffer.st_mode))
-  return __buffer.st_size - std::get_file_offset(this);
-#endif
 #endif
 return 0;
   }
-- 
2.41.0



[PATCH] libstdc++: Fix --enable-cstdio=stdio_pure [PR110574]

2023-07-07 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux (with --enable-cstdio=stdio_pure and without).

I intend to commit this next week.


This fixes most of the fstream failures seen with stdio_pure, although
there are still a few that fail (about half of them time out):

FAIL: 27_io/basic_filebuf/imbue/char/13171-2.cc execution test
FAIL: 27_io/basic_filebuf/seekoff/45628-2.cc execution test
FAIL: 27_io/basic_filebuf/seekoff/char/26777.cc execution test
FAIL: 27_io/basic_filebuf/seekoff/char/4.cc execution test
FAIL: 27_io/basic_filebuf/seekoff/wchar_t/4.cc execution test
FAIL: 27_io/basic_filebuf/sputbackc/char/1-io.cc execution test
FAIL: 27_io/basic_filebuf/underflow/char/10097.cc execution test
FAIL: 27_io/basic_filebuf/underflow/wchar_t/5.cc execution test
FAIL: 27_io/objects/wchar_t/12.cc execution test

-- >8 --

When configured with --enable-cstdio=stdio_pure we need to consistently
use fseek and not mix seeks on the file descriptor with reads and writes
on the FILE stream.

There are also a number of bugs related to error handling and return
values, because fread and fwrite return 0 on error, not -1, and fseek
returns 0 on success, not the file offset.

libstdc++-v3/ChangeLog:

PR libstdc++/110574
* acinclude.m4 (GLIBCXX_CHECK_LFS): Check for fseeko and ftello
and define _GLIBCXX_USE_FSEEKO_FTELLO.
* config.h.in: Regenerate.
* configure: Regenerate.
* config/io/basic_file_stdio.cc (xwrite) [_GLIBCXX_USE_STDIO_PURE]:
Check for fwrite error correctly.
(__basic_file::xsgetn) [_GLIBCXX_USE_STDIO_PURE]: Check for
fread error correctly.
(get_file_offset): New function.
(__basic_file::seekoff) [_GLIBCXX_USE_STDIO_PURE]: Use
fseeko if available. Use get_file_offset instead of return value
of fseek.
(__basic_file::showmanyc): Use get_file_offset.
---
 libstdc++-v3/acinclude.m4  | 16 ++
 libstdc++-v3/config.h.in   |  3 +
 libstdc++-v3/config/io/basic_file_stdio.cc | 66 --
 libstdc++-v3/configure | 66 ++
 4 files changed, 133 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 823832f97d4..b25378eaace 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -497,6 +497,22 @@ AC_DEFUN([GLIBCXX_CHECK_LFS], [
   if test $glibcxx_cv_LFS = yes; then
 AC_DEFINE(_GLIBCXX_USE_LFS, 1, [Define if LFS support is available.])
   fi
+
+  AC_CACHE_CHECK([for fseeko and ftello], glibcxx_cv_posix_lfs, [
+GCC_TRY_COMPILE_OR_LINK(
+  [#include 
+  ],
+  [FILE* fp;
+   fseeko(fp, 0, SEEK_CUR);
+   ftello(fp);
+  ],
+  [glibcxx_cv_posix_lfs=yes],
+  [glibcxx_cv_posix_lfs=no])
+  ])
+  if test $glibcxx_cv_posix_lfs = yes; then
+AC_DEFINE(_GLIBCXX_USE_FSEEKO_FTELLO, 1, [Define if fseeko and ftello are 
available.])
+  fi
+
   CXXFLAGS="$ac_save_CXXFLAGS"
   AC_LANG_RESTORE
 ])
diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in
index 91eca6ef608..1c2224c1919 100644
--- a/libstdc++-v3/config.h.in
+++ b/libstdc++-v3/config.h.in
@@ -988,6 +988,9 @@
 /* Define if fchmodat is available in . */
 #undef _GLIBCXX_USE_FCHMODAT
 
+/* Define if fseeko and ftello are available. */
+#undef _GLIBCXX_USE_FSEEKO_FTELLO
+
 /* Defined if gettimeofday is available. */
 #undef _GLIBCXX_USE_GETTIMEOFDAY
 
diff --git a/libstdc++-v3/config/io/basic_file_stdio.cc 
b/libstdc++-v3/config/io/basic_file_stdio.cc
index 7de9d98c3c7..27c2ad2afe3 100644
--- a/libstdc++-v3/config/io/basic_file_stdio.cc
+++ b/libstdc++-v3/config/io/basic_file_stdio.cc
@@ -129,14 +129,15 @@ namespace
   {
 #ifdef _GLIBCXX_USE_STDIO_PURE
const std::streamsize __ret = fwrite(__s, 1, __nleft, __file);
+   if (__ret == 0 && ferror(__file))
+ break;
 #else
const std::streamsize __ret = write(__fd, __s, __nleft);
-#endif
if (__ret == -1L && errno == EINTR)
  continue;
if (__ret == -1L)
  break;
-
+#endif
__nleft -= __ret;
if (__nleft == 0)
  break;
@@ -330,13 +331,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __basic_file::xsgetn(char* __s, streamsize __n)
   {
 streamsize __ret;
-do
 #ifdef _GLIBCXX_USE_STDIO_PURE
-  __ret = fread(__s, 1, __n, this->file());
+__ret = fread(__s, 1, __n, this->file());
+if (__ret == 0 && ferror(this->file()))
+  __ret = -1;
 #else
+do
   __ret = read(this->fd(), __s, __n);
-#endif
 while (__ret == -1L && errno == EINTR);
+#endif
 return __ret;
   }
 
@@ -375,20 +378,52 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 return __ret;
   }
 
+  namespace
+  {
+inline streamoff
+get_file_offset(__basic_file* __f)
+{
+#ifdef _GLIBCXX_USE_STDIO_PURE
+# ifdef _GLIBCXX_USE_FSEEKO_FTELLO
+  return ftello(__f->file());
+# else
+  return ftell(__f->file());
+# endif
+#elif defined(_GLIBCXX_USE_LFS)
+  return lseek64(__f->fd(), 0, 

Re: [V1][PATCH 0/3] New attribute "element_count" to annotate bounds for C99 FAM(PR108896)

2023-07-07 Thread Qing Zhao via Gcc-patches


> On Jul 6, 2023, at 5:10 PM, Martin Uecker  wrote:
> 
> Am Donnerstag, dem 06.07.2023 um 18:56 + schrieb Qing Zhao:
>> Hi, Kees,
>> 
>> I have updated my V1 patch with the following changes:
>> A. changed the name to "counted_by"
>> B. changed the argument from a string to an identifier
>> C. updated the documentation and testing cases accordingly.
>> 
>> And then used this new gcc to test 
>> https://github.com/kees/kernel-tools/blob/trunk/fortify/array-bounds.c (with 
>> the following change)
>> [opc@qinzhao-ol8u3-x86 Kees]$ !1091
>> diff array-bounds.c array-bounds.c.org
>> 32c32
>> < # define __counted_by(member)  __attribute__((counted_by (member)))
>> ---
>>> # define __counted_by(member)   
>>> __attribute__((__element_count__(#member)))
>> 34c34
>> < # define __counted_by(member)   __attribute__((counted_by (member)))
>> ---
>>> # define __counted_by(member)   /* 
>>> __attribute__((__element_count__(#member))) */
>> 
>> Then I got the following result:
>> [opc@qinzhao-ol8u3-x86 Kees]$ ./array-bounds 2>&1 | grep -v ^'#'
>> TAP version 13
>> 1..12
>> ok 1 global.fixed_size_seen_by_bdos
>> ok 2 global.fixed_size_enforced_by_sanitizer
>> not ok 3 global.unknown_size_unknown_to_bdos
>> not ok 4 global.unknown_size_ignored_by_sanitizer
>> ok 5 global.alloc_size_seen_by_bdos
>> ok 6 global.alloc_size_enforced_by_sanitizer
>> not ok 7 global.element_count_seen_by_bdos
>> ok 8 global.element_count_enforced_by_sanitizer
>> not ok 9 global.alloc_size_with_smaller_element_count_seen_by_bdos
>> not ok 10 global.alloc_size_with_smaller_element_count_enforced_by_sanitizer
>> ok 11 global.alloc_size_with_bigger_element_count_seen_by_bdos
>> ok 12 global.alloc_size_with_bigger_element_count_enforced_by_sanitizer
>> 
>> The same as your previous results. Then I took a look at all the failed 
>> testing: 3, 4, 7, 9, and 10. And studied the reasons for all of them.
>> 
>>  in a summary, there are two major issues:
>> 1.  The reason for the failed testing 7 is the same issue as I observed in 
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109557
>> Which is not a bug, it’s an expected behavior. 
>> 
>> 2. The common issue for  the failed testing 3, 4, 9, 10 is:
>> 
>> for the following annotated structure: 
>> 
>> 
>> struct annotated {
>> unsigned long flags;
>> size_t foo;
>> int array[] __attribute__((counted_by (foo)));
>> };
>> 
>> 
>> struct annotated *p;
>> int index = 16;
>> 
>> p = malloc(sizeof(*p) + index * sizeof(*p->array));  // allocated real size 
>> 
>> p->foo = index + 2;  // p->foo was set by a different value than the real 
>> size of p->array as in test 9 and 10
>> or
>> p->foo was not set to any value as in test 3 and 4
>> 
>> 
>> 
>> i.e, the value of p->foo is NOT synced with the number of elements allocated 
>> for the array p->array.  
>> 
>> I think that this should be considered as an user error, and the 
>> documentation of the attribute should include
>> this requirement.  (In the LLVM’s RFC, such requirement was included in the 
>> programing model: 
>> https://discourse.llvm.org/t/rfc-enforcing-bounds-safety-in-c-fbounds-safety/70854#maintaining-correctness-of-bounds-annotations-18)
>> 
>> We can add a new warning option -Wcounted-by to report such user error if 
>> needed.
>> 
>> What’s your opinion on this?
> 
> 
> Additionally, we could also have a sanitizer that
> checks this at run-time.

Yes, that’s also a nice feature to have. 
I think that the main point here is to catch such user errors during 
compilation time or run time. 

I will add one or two separate patches for these compilation warning and 
sanitizer feature.


> 
> Personally, I am still not very happy that in the
> following example the two 'n's refer to different
> entities:
> 
> void f(int n)
> {
>struct foo {
>int n;   
>int (*p[])[n] [[counted_by(n)]];
>};
> }
> 
Me either )-:


> But I guess it will be difficult to convince everybody
> that it would be wise to use a new syntax for
> disambiguation:
> 
> void f(int n)
> {
>struct foo {
>int n;   
>int (*p[])[n] [[counted_by(.n)]];
>};
> }
> 

I guess that it’s quite hard to convince everyone that the new syntax is the 
best solution at this moment. 
And it might not worth the effort at this time.

We can do the new syntax later if necessary.

thanks.

Qing

> Martin
> 
> 
>> 
>> thanks.
>> 
>> Qing
>> 
>> 
>>> On May 26, 2023, at 4:40 PM, Kees Cook  wrote:
>>> 
>>> On Thu, May 25, 2023 at 04:14:47PM +, Qing Zhao wrote:
 GCC will pass the number of elements info from the attached attribute to 
 both 
 __builtin_dynamic_object_size and bounds sanitizer to check the 
 out-of-bounds
 or dynamic object size issues during runtime for flexible array members.
 
 This new feature will provide nice protection to flexible array members 
 (which
 currently are completely ignored by both __builtin_dynamic_object_size and
 bounds 

[PATCH v2] x86: Properly find the maximum stack slot alignment

2023-07-07 Thread H.J. Lu via Gcc-patches
Don't assume that stack slots can only be accessed by stack or frame
registers.  We first find all registers defined by stack or frame
registers.  Then check memory accesses by such registers, including
stack and frame registers.

gcc/

PR target/109780
* config/i386/i386.cc (ix86_update_stack_alignment): New.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.

gcc/testsuite/

PR target/109780
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
---
 gcc/config/i386/i386.cc| 120 +
 gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +
 gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 +++
 gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 
 4 files changed, 206 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index caca74d6dec..27f349b0ccb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Update the maximum stack slot alignment from memory alignment in
+   PAT.  */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+  /* This insn may reference stack slot.  Update the maximum stack slot
+ alignment.  */
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, pat, ALL)
+if (MEM_P (*iter))
+  {
+   unsigned int alignment = MEM_ALIGN (*iter);
+   unsigned int *stack_alignment
+ = (unsigned int *) data;
+   if (alignment > *stack_alignment)
+ *stack_alignment = alignment;
+   break;
+  }
+}
+
+/* Find all registers defined with REG.  */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET _slot_access, int reg)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (reg);
+   ref != NULL;
+   ref = DF_REF_NEXT_REG (ref))
+{
+  if (DF_REF_IS_ARTIFICIAL (ref))
+   continue;
+
+  rtx_insn *insn = DF_REF_INSN (ref);
+  if (!NONDEBUG_INSN_P (insn))
+   continue;
+
+  rtx set = single_set (insn);
+  if (!set)
+   continue;
+
+  rtx src = SET_SRC (set);
+  if (MEM_P (src))
+   continue;
+
+  rtx dest = SET_DEST (set);
+  if (!REG_P (dest))
+   continue;
+
+  if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+   continue;
+
+  /* Add this register to stack_slot_access.  */
+  add_to_hard_reg_set (_slot_access, Pmode, REGNO (dest));
+}
+}
+
 /* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
@@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int 
_alignment,
   add_to_hard_reg_set (_up_by_prologue, Pmode,
   HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
-stack_alignment = crtl->preferred_stack_boundary;
-
   bool require_stack_frame = false;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int 
_alignment,
   set_up_by_prologue))
  {
require_stack_frame = true;
-
-   if (check_stack_slot)
- {
-   /* Find the maximum stack alignment.  */
-   subrtx_iterator::array_type array;
-   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
-  *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
-   {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
-   stack_alignment = alignment;
-   }
- }
+   break;
  }
 }
 
   cfun->machine->stack_frame_required = require_stack_frame;
+
+  /* Stop if we don't need to check stack slot.  */
+  if (!check_stack_slot)
+return;
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+stack_alignment = crtl->preferred_stack_boundary;
+
+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+ 

Re: [PATCH] s390: Fix vec_init default expander

2023-07-07 Thread Andreas Krebbel via Gcc-patches
On 7/7/23 15:51, Juergen Christ wrote:
> Do not reinitialize vector lanes to zero since they are already initialized to
> zero.
> 
> Bootstrapped and regression tested on s390x.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (vec_init): Fix default case
> 
> gcc/Testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-init-3.c: New test.

Ok. Pushed to mainline. Thanks!

Andreas



Re: Re: [PATCH V4] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread 钟居哲
Thanks. I still sent V5 with fixing "dummy" into "vlmax"
and add more comments.

Thanks.



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-07-07 20:34
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH V4] RISC-V: Support gather_load/scatter RVV 
auto-vectorization
Hi Juzhe,
 
thanks, the somewhat unified modulo is IMHO a more readable.
Could probably still be improved but OK with me for now.
 
> +   if (is_dummy_len)
> + {
> +   rtx dummy_len = gen_reg_rtx (Pmode);
 
Can we call this is_vlmax_len/is_vlmax and vlmax_len or so?
 
> +  if (inner_offsize < inner_vsize)
> +{
> +  /* 7.2. Vector Load/Store Addressing Modes.
> + If the vector offset elements are narrower than XLEN, they are
> + zero-extended to XLEN before adding to the ptr effective address. If
> + the vector offset elements are wider than XLEN, the least-significant
> + XLEN bits are used in the address calculation. An implementation must
> + raise an illegal instruction exception if the EEW is not supported for
> + offset elements.  */
> +  if (!zero_extend_p || (zero_extend_p && scale_log2 != 0))
 
Hehe I really thought we have a widening shift, well ;)
 
I see, the zero extension refers to this part of the GCC docs
"multiply the extended offset by operand 4;" 
 
and not to the RVV spec.  You could clarify this then, saying
that the RVV spec only refers to the scale_log == 0 case.
 
The rest LGTM now, no separate revision needed for those nits.
 
Regards
Robin
 


Re: [PING][PATCH] tree-optimization/110279- Check for nested FMA chains in reassoc

2023-07-07 Thread Philipp Tomsich
On Fri, 7 Jul 2023 at 10:28, Di Zhao OS via Gcc-patches
 wrote:
>
> Update the patch so it can apply.
>
> Tested on spec2017 fprate cases again. With option "-funroll-loops -Ofast 
> -flto",
> the improvements of 1-copy run are:
>
> Ampere1:
> 508.namd_r  4.26%
> 510.parest_r2.55%
> Overall 0.54%
> Intel Xeon:
> 503.bwaves_r1.3%
> 508.namd_r  1.58%
> overall 0.42%

This looks like a worthwhile improvement.

>From reviewing the patch, a few nit-picks:
- given that 'has_fma' can now take three values { -1, 0, 1 }, an enum
with more descriptive names for these 3 states should be used;
- using "has_fma >= 0" and "fma > 0" tests are hard to read; after
changing this to an enum, you can use macros or helper functions to
test the predicates (i.e., *_P macros or *_p helpers) for readability
- the meaning of the return values of rank_ops_for_fma should be
documented in the comment describing the function
- changing convert_mult_to_fma_1 to return a tree* (i.e., return_lhs
or NULL_TREE) removes the need for an in/out parameter

Thanks,
Philipp.

>
>
> Thanks,
> Di Zhao
>
>
> > -Original Message-
> > From: Di Zhao OS
> > Sent: Friday, June 16, 2023 4:51 PM
> > To: gcc-patches@gcc.gnu.org
> > Subject: [PATCH] tree-optimization/110279- Check for nested FMA chains in
> > reassoc
> >
> > This patch is to fix the regressions found in SPEC2017 fprate cases
> >  on aarch64.
> >
> > 1. Reused code in pass widening_mul to check for nested FMA chains
> >  (those connected by MULT_EXPRs), since re-writing to parallel
> >  generates worse codes.
> >
> > 2. Avoid re-arrange to produce less FMA chains that can be slow.
> >
> > Tested on ampere1 and neoverse-n1, this fixed the regressions in
> > 508.namd_r and 510.parest_r 1 copy run. While I'm still collecting data
> > on x86 machines we have, I'd like to know what do you think of this.
> >
> > (Previously I tried to improve things with FMA by adding a widening_mul
> > pass before reassoc2 for it's easier to recognize different patterns
> > of FMA chains and decide whether to split them. But I suppose handling
> > them all in reassoc pass is more efficient.)
> >
> > Thanks,
> > Di Zhao
> >
> > ---
> > gcc/ChangeLog:
> >
> > * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Add new parameter.
> > Support new mode that merely do the checking.
> > (struct fma_transformation_info): Moved to header.
> > (class fma_deferring_state): Moved to header.
> > (convert_mult_to_fma): Add new parameter.
> > * tree-ssa-math-opts.h (struct fma_transformation_info):
> > (class fma_deferring_state): Moved from .cc.
> > (convert_mult_to_fma): Add function decl.
> > * tree-ssa-reassoc.cc (rewrite_expr_tree_parallel):
> > (rank_ops_for_fma): Return -1 if nested FMAs are found.
> > (reassociate_bb): Avoid rewriting to parallel if nested FMAs are
> > found.
>


Re: [PATCH ver 2] rs6000, __builtin_set_fpscr_rn add retrun value

2023-07-07 Thread Peter Bergner via Gcc-patches
On 7/7/23 12:08 AM, Kewen.Lin wrote:
> on 2023/7/7 07:00, Peter Bergner wrote:
>> On 7/6/23 5:54 PM, Peter Bergner wrote:
>>> On 6/30/23 7:58 PM, Carl Love via Gcc-patches wrote:
 +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_2.c
 @@ -0,0 +1,153 @@
 +/* { dg-do run { target { powerpc*-*-* } } } */
>>>
>>> powerpc*-*-* is the default for this test directory, so you can drop that,
>>> but you need to disable this test for soft-float systems, so you probably 
>>> want:
>>>
>>>   /* { dg-do run { target powerpc_fprs } } */
>>
>> We actually want something like powerpc_fprs_hw, but that doesn't exist.
>>
> 
> Yeah, good point!  I noticed that we have a few test cases which need to
> check soft-float env as well but they don't, I didn't find any related
> issues have been reported, so I would assume that there are very few
> actual testings on this area.  Based on this, I'm not sure if it's worthy
> to add a new effective target for it.  Personally I'm happy with just using
> powerpc_fprs here to keep it simple. :)

I think powerpc_fprs_hw can be added later by someone if they care.
Using powerpc_fprs is an improvement over powerpc*-*-*, since it will
reduce some FAILs, just not all of them a powerpc_fprs_hw would.
I doubt many people are running the testsuite on real ppc hardware
that doesn't have an FP unit.

Peter



Re: [PATCH 4/19]middle-end: Fix scale_loop_frequencies segfault on multiple-exits

2023-07-07 Thread Jan Hubicka via Gcc-patches
> Hi Both,
> 
> Thanks for all the reviews/patches so far 
> 
> > >
> > > Looks good, but I wonder what we can do to at least make the multiple
> > > exit case behave reasonably?  The vectorizer keeps track
> > 
> > > of a "canonical" exit, would it be possible to pass in the main exit
> > > edge and use that instead of single_exit (), would other exits then
> > > behave somewhat reasonable or would we totally screw things up here?
> > > That is, the "canonical" exit would be the counting exit while the
> > > other exits are on data driven conditions and thus wouldn't change
> > > probability when we reduce the number of iterations(?)
> > 
> > I can add canonical_exit parameter and make the function to direct flow to 
> > it if
> > possible.  However overall I think fixup depends on what transformation led 
> > to
> > the change.
> > 
> > Assuming that vectorizer did no prologues and apilogues and we vectorized
> > with factor N, then I think the update could be done more specifically as
> > follows.
> > 
> 
> If it helps, how this patch series addresses multiple exits by forcing a 
> scalar
> epilogue, all non canonical_exits would have been redirected to this scalar
> epilogue, so the remaining scalar iteration count will be at most VF.

It looks like profile update after vectorization needs quite some TLC.
My student Ondrej Kubanek also implemented loop histogram profiling
which gives better idea on how commonly prologues/epilogues are needed
and it would be also nice to handle it.
> > ;;   basic block 12, loop depth 0, count 10737416 (estimated locally), maybe
> > hot
> > ;;prev block 9, next block 13, flags: (NEW, VISITED)
> > ;;pred:   8 [50.0% (adjusted)]  count:10737418 (estimated locally)
> > (FALSE_VALUE,EXECUTABLE)
> > ;;succ:   13 [always]  count:10737416 (estimated locally) (FALLTHRU)
> > 
> > ;;   basic block 13, loop depth 1, count 1063004409 (estimated locally),
> > maybe hot
> > ;;prev block 12, next block 14, flags: (NEW, REACHABLE, VISITED)
> > ;;pred:   14 [always]  count:1052266996 (estimated locally)
> > (FALLTHRU,DFS_BACK,EXECUTABLE)
> > ;;12 [always]  count:10737416 (estimated locally) (FALLTHRU)
> >   # i_30 = PHI 
> >   # ivtmp_32 = PHI 
> >   _33 = a[i_30];
> >   _34 = _33 + 1;
> >   a[i_30] = _34;
> >   i_36 = i_30 + 1;
> >   ivtmp_37 = ivtmp_32 - 1;
> >   if (ivtmp_37 != 0)
> > goto ; [98.99%]
> >   else
> > goto ; [1.01%]

Actually it seems that the scalar epilogue loop is with oriignal profile
(predicted to iterate 99 times) which is quite wrong.
Looking at the statistics for yesterday patch, on tramp3d we got 86%
reduction in cummulative profile mismatches after whole optimization
pipeline.  More interestingly however the overall time esimtate
dropped by 18%, so it seems that the profile adjustment done by cunroll
are afecting the profile a lot.

I think the fact that iteration counts of epilogues is not capped is one
of main problems.

We seem to call scale_loop_profile 3 times:

   scale_loop_profile (loop, prob_vector, -1);

This seems to account for the probability that control flow is
redirected to prolog/epilog later.  So it only scales down the profile
but is not responsible 

   scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);

This is does prolog and sets bound.

   scale_loop_profile (epilog, prob_epilog, -1);

This scales epilog but does not set bound at all. 
I think the information is availale since we update the loop_info
datastructures.

Honza


[pushed][LRA][PR110372]: Refine reload pseudo class

2023-07-07 Thread Vladimir Makarov via Gcc-patches

The following patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110372

The patch was successfully bootstrapped and tested on x86-64.
commit 1f7e5a7b91862b999aab88ee0319052aaf00f0f1
Author: Vladimir N. Makarov 
Date:   Fri Jul 7 09:53:38 2023 -0400

LRA: Refine reload pseudo class

For given testcase a reload pseudo happened to occur only in reload
insns created on one constraint sub-pass.  Therefore its initial class
(ALL_REGS) was not refined and the reload insns were not processed on
the next constraint sub-passes.  This resulted into the wrong insn.

PR rtl-optimization/110372

gcc/ChangeLog:

* lra-assigns.cc (assign_by_spills): Add reload insns involving
reload pseudos with non-refined class to be processed on the next
sub-pass.
* lra-constraints.cc (enough_allocatable_hard_regs_p): New func.
(in_class_p): Use it.
(print_curr_insn_alt): New func.
(process_alt_operands): Use it.  Improve debug info.
(curr_insn_transform): Use print_curr_insn_alt.  Refine reload
pseudo class if it is not refined yet.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr110372.c: New.

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index 73fbef29912..2f95121df06 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1443,10 +1443,11 @@ assign_by_spills (void)
 		 pass.  Indicate that it is no longer spilled.  */
 	  bitmap_clear_bit (_spilled_pseudos, regno);
 	  assign_hard_regno (hard_regno, regno);
-	  if (! reload_p)
-		/* As non-reload pseudo assignment is changed we
-		   should reconsider insns referring for the
-		   pseudo.  */
+	  if (! reload_p || regno_allocno_class_array[regno] == ALL_REGS)
+		/* As non-reload pseudo assignment is changed we should
+		   reconsider insns referring for the pseudo.  Do the same if a
+		   reload pseudo did not refine its class which can happens
+		   when the pseudo occurs only in reload insns.  */
 		bitmap_set_bit (_pseudo_bitmap, regno);
 	}
 	}
diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 4dc2d70c402..123ff662cbc 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -233,6 +233,34 @@ get_reg_class (int regno)
   return NO_REGS;
 }
 
+/* Return true if REG_CLASS has enough allocatable hard regs to keep value of
+   REG_MODE.  */
+static bool
+enough_allocatable_hard_regs_p (enum reg_class reg_class,
+enum machine_mode reg_mode)
+{
+  int i, j, hard_regno, class_size, nregs;
+  
+  if (hard_reg_set_subset_p (reg_class_contents[reg_class], lra_no_alloc_regs))
+return false;
+  class_size = ira_class_hard_regs_num[reg_class];
+  for (i = 0; i < class_size; i++)
+{
+  hard_regno = ira_class_hard_regs[reg_class][i];
+  nregs = hard_regno_nregs (hard_regno, reg_mode);
+  if (nregs == 1)
+	return true;
+  for (j = 0; j < nregs; j++)
+	if (TEST_HARD_REG_BIT (lra_no_alloc_regs, hard_regno + j)
+	|| ! TEST_HARD_REG_BIT (reg_class_contents[reg_class],
+hard_regno + j))
+	  break;
+  if (j >= nregs)
+	return true;
+}
+  return false;
+}
+
 /* Return true if REG satisfies (or will satisfy) reg class constraint
CL.  Use elimination first if REG is a hard register.  If REG is a
reload pseudo created by this constraints pass, assume that it will
@@ -252,7 +280,6 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
   enum reg_class rclass, common_class;
   machine_mode reg_mode;
   rtx src;
-  int class_size, hard_regno, nregs, i, j;
   int regno = REGNO (reg);
 
   if (new_class != NULL)
@@ -291,26 +318,7 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
   common_class = ira_reg_class_subset[rclass][cl];
   if (new_class != NULL)
 	*new_class = common_class;
-  if (hard_reg_set_subset_p (reg_class_contents[common_class],
- lra_no_alloc_regs))
-	return false;
-  /* Check that there are enough allocatable regs.  */
-  class_size = ira_class_hard_regs_num[common_class];
-  for (i = 0; i < class_size; i++)
-	{
-	  hard_regno = ira_class_hard_regs[common_class][i];
-	  nregs = hard_regno_nregs (hard_regno, reg_mode);
-	  if (nregs == 1)
-	return true;
-	  for (j = 0; j < nregs; j++)
-	if (TEST_HARD_REG_BIT (lra_no_alloc_regs, hard_regno + j)
-		|| ! TEST_HARD_REG_BIT (reg_class_contents[common_class],
-	hard_regno + j))
-	  break;
-	  if (j >= nregs)
-	return true;
-	}
-  return false;
+  return enough_allocatable_hard_regs_p (common_class, reg_mode);
 }
 }
 
@@ -2046,6 +2054,23 @@ update_and_check_small_class_inputs (int nop, int nalt,
   return false;
 }
 
+/* Print operand constraints for alternative ALT_NUMBER of the current
+   insn.  */
+static void
+print_curr_insn_alt (int alt_number)
+{
+  for (int i = 0; i < curr_static_id->n_operands; i++)
+{
+  const char *p = 

[PATCH] s390: Fix vec_init default expander

2023-07-07 Thread Juergen Christ via Gcc-patches
Do not reinitialize vector lanes to zero since they are already initialized to
zero.

Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

* config/s390/s390.cc (vec_init): Fix default case

gcc/Testsuite/ChangeLog:

* gcc.target/s390/vector/vec-init-3.c: New test.

Signed-off-by: Juergen Christ 
---
 gcc/config/s390/s390.cc | 11 ++-
 .../gcc.target/s390/vector/vec-init-3.c | 17 +
 2 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-init-3.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 505de995da87..31b646782721 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -7130,11 +7130,12 @@ s390_expand_vec_init (rtx target, rtx vals)
   if (!general_operand (elem, GET_MODE (elem)))
elem = force_reg (inner_mode, elem);
 
-  emit_insn (gen_rtx_SET (target,
- gen_rtx_UNSPEC (mode,
- gen_rtvec (3, elem,
-GEN_INT (i), target),
- UNSPEC_VEC_SET)));
+  if (elem != const0_rtx)
+   emit_insn (gen_rtx_SET (target,
+   gen_rtx_UNSPEC (mode,
+   gen_rtvec (3, elem,
+  GEN_INT (i), target),
+   UNSPEC_VEC_SET)));
 }
 }
 
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-init-3.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-init-3.c
new file mode 100644
index ..12008a963ffb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-init-3.c
@@ -0,0 +1,17 @@
+/* Check that the default case of the vec_init expander does its job.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+
+extern v4si G;
+
+v4si
+n (signed int a)
+{
+  return G == (v4si){ a };
+}
+/* { dg-final { scan-assembler-times "vzero" 1 } } */
+/* { dg-final { scan-assembler-times "vlvgf\t" 1 } } */
+/* { dg-final { scan-assembler-not "vleif\t" } } */
-- 
2.39.3



[PATCH v2] vect: Fix vectorized BIT_FIELD_REF for signed bit-fields [PR110557]

2023-07-07 Thread Xi Ruoyao via Gcc-patches
If a bit-field is signed and it's wider than the output type, we must
ensure the extracted result sign-extended.  But this was not handled
correctly.

For example:

int x : 8;
long y : 55;
bool z : 1;

The vectorized extraction of y was:

vect__ifc__49.29_110 =
  MEM  [(struct Item *)vectp_a.27_108];
vect_patt_38.30_112 =
  vect__ifc__49.29_110 & { 9223372036854775552, 9223372036854775552 };
vect_patt_39.31_113 = vect_patt_38.30_112 >> 8;
vect_patt_40.32_114 =
  VIEW_CONVERT_EXPR(vect_patt_39.31_113);

This is obviously incorrect.  This pach has implemented it as:

vect__ifc__25.16_62 =
  MEM  [(struct Item *)vectp_a.14_60];
vect_patt_31.17_63 =
  VIEW_CONVERT_EXPR(vect__ifc__25.16_62);
vect_patt_32.18_64 = vect_patt_31.17_63 << 1;
vect_patt_33.19_65 = vect_patt_32.18_64 >> 9;

gcc/ChangeLog:

PR tree-optimization/110557
* tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern):
Ensure the output sign-extended if necessary.

gcc/testsuite/ChangeLog:

PR tree-optimization/110557
* g++.dg/vect/pr110557.cc: New test.
---

Change v1 -> v2:

- Rename two variables for readability.
- Remove a redundant useless_type_conversion_p check.
- Edit the comment for early conversion to show the rationale of
  "|| ref_sext".

Bootstrapped (with BOOT_CFLAGS="-O3 -mavx2") and regtested on
x86_64-linux-gnu.  Ok for trunk and gcc-13?

 gcc/testsuite/g++.dg/vect/pr110557.cc | 37 
 gcc/tree-vect-patterns.cc | 62 ---
 2 files changed, 83 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/pr110557.cc

diff --git a/gcc/testsuite/g++.dg/vect/pr110557.cc 
b/gcc/testsuite/g++.dg/vect/pr110557.cc
new file mode 100644
index 000..e1fbe1caac4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr110557.cc
@@ -0,0 +1,37 @@
+// { dg-additional-options "-mavx" { target { avx_runtime } } }
+
+static inline long
+min (long a, long b)
+{
+  return a < b ? a : b;
+}
+
+struct Item
+{
+  int x : 8;
+  long y : 55;
+  bool z : 1;
+};
+
+__attribute__ ((noipa)) long
+test (Item *a, int cnt)
+{
+  long size = 0;
+  for (int i = 0; i < cnt; i++)
+size = min ((long)a[i].y, size);
+  return size;
+}
+
+int
+main ()
+{
+  struct Item items[] = {
+{ 1, -1 },
+{ 2, -2 },
+{ 3, -3 },
+{ 4, -4 },
+  };
+
+  if (test (items, 4) != -4)
+__builtin_trap ();
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 1bc36b043a0..c0832e8679f 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -2566,7 +2566,7 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
Widening with mask first, shift later:
container = (type_out) container;
masked = container & (((1 << bitsize) - 1) << bitpos);
-   result = patt2 >> masked;
+   result = masked >> bitpos;
 
Widening with shift first, mask last:
container = (type_out) container;
@@ -2578,6 +2578,15 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
result = masked >> bitpos;
result = (type_out) result;
 
+   If the bitfield is signed and it's wider than type_out, we need to
+   keep the result sign-extended:
+   container = (type) container;
+   masked = container << (prec - bitsize - bitpos);
+   result = (type_out) (masked >> (prec - bitsize));
+
+   Here type is the signed variant of the wider of type_out and the type
+   of container.
+
The shifting is always optional depending on whether bitpos != 0.
 
 */
@@ -2636,14 +2645,22 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (BYTES_BIG_ENDIAN)
 shift_n = prec - shift_n - mask_width;
 
+  bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
+  TYPE_PRECISION (ret_type) > mask_width);
+  bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
+TYPE_PRECISION (ret_type));
+
   /* We move the conversion earlier if the loaded type is smaller than the
- return type to enable the use of widening loads.  */
-  if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
-  && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
-{
-  pattern_stmt
-   = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
-  NOP_EXPR, container);
+ return type to enable the use of widening loads.  And if we need a
+ sign extension, we need to convert the loaded value early to a signed
+ type as well.  */
+  if (ref_sext || load_widen)
+{
+  tree type = load_widen ? ret_type : container_type;
+  if (ref_sext)
+   type = gimple_signed_type (type);
+  pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
+ NOP_EXPR, container);
   container = gimple_get_lhs (pattern_stmt);
   container_type = TREE_TYPE (container);
   prec = tree_to_uhwi (TYPE_SIZE (container_type));
@@ -2671,7 

Re: [PATCH V4] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

thanks, the somewhat unified modulo is IMHO a more readable.
Could probably still be improved but OK with me for now.

> +   if (is_dummy_len)
> + {
> +   rtx dummy_len = gen_reg_rtx (Pmode);

Can we call this is_vlmax_len/is_vlmax and vlmax_len or so?

> +  if (inner_offsize < inner_vsize)
> +{
> +  /* 7.2. Vector Load/Store Addressing Modes.
> +  If the vector offset elements are narrower than XLEN, they are
> +  zero-extended to XLEN before adding to the ptr effective address. If
> +  the vector offset elements are wider than XLEN, the least-significant
> +  XLEN bits are used in the address calculation. An implementation must
> +  raise an illegal instruction exception if the EEW is not supported for
> +  offset elements.  */
> +  if (!zero_extend_p || (zero_extend_p && scale_log2 != 0))

Hehe I really thought we have a widening shift, well ;)

I see, the zero extension refers to this part of the GCC docs
 "multiply the extended offset by operand 4;" 

and not to the RVV spec.  You could clarify this then, saying
that the RVV spec only refers to the scale_log == 0 case.

The rest LGTM now, no separate revision needed for those nits.

Regards
 Robin


Re: [PATCH] Fortran: fixes for procedures with ALLOCATABLE,INTENT(OUT) arguments [PR92178]

2023-07-07 Thread Mikael Morin

Le 05/07/2023 à 22:36, Harald Anlauf a écrit :

Hi Mikael,

Am 05.07.23 um 16:54 schrieb Mikael Morin:

Here is an example, admittedly artificial.  Fails with the above change,
but fails with master as well.

program p
   implicit none
   type t
 integer :: i
   end type t
   type u
 class(t), allocatable :: ta(:)
   end type u
   type(u), allocatable, target :: c(:)
   c = [u([t(1), t(3)]), u([t(4), t(9)])]
   call bar (allocated (c(c(1)%ta(1)%i)%ta), c(c(1)%ta(1)%i)%ta,
allocated (c(c(1)%ta(1)%i)%ta))
   if (allocated(c(1)%ta)) stop 11
   if (.not. allocated(c(2)%ta)) stop 12
contains
   subroutine bar (alloc, x, alloc2)
 logical :: alloc, alloc2
 class(t), allocatable, intent(out) :: x(:)
 if (allocated (x)) stop 1
 if (.not. alloc)   stop 2
 if (.not. alloc2)  stop 3
   end subroutine bar
end


while it looks artificial, it is valid, and IMHO it is a beast...

I've played around and added another argument gfc_se *convse to
gfc_conv_class_to_class in an attempt to implement what I thought
you suggested (to get the .pre/.post separately), but in the end
this did not lead to working code.  And the tree-dump for your
example above is beyond what I can grasp.

I've noticed that my attempt does not properly handle the
parmse.post; at least this is what the above example shows:
there is a small part after the call to bar that should have
been executed before that call, which I attribute to .post.
But my attempts in moving that part regresses on a couple
of testcases with class and intent(out).  I am at a loss now.

All that I can see after the call is a reassignment of the original data 
and vptr pointers from the temporary class container.  They seem at 
their right place there. But part of the expression seems to be 
evaluated again, instead of being picked up from parmse.expr.



I am attaching the latest version of my patch to give you or
Paul or others the opportunity to see what is wrong or add the
missing pieces.


I'm attaching what I have (lightly) tested so far, which doesn't work.
It seems gfc_conv_class_to_class reevaluates part of the original 
expression, which is not correct after deallocation.

Will have a look again tonight.

Mikael

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index ebef1a36577..54249c9c615 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -6819,9 +6819,13 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
 		  defer_to_dealloc_blk = true;
 		}
 
+	  gfc_se class_se = parmse;
+	  gfc_init_block (_se.pre);
+	  gfc_init_block (_se.post);
+
 	  /* The conversion does not repackage the reference to a class
 	 array - _data descriptor.  */
-	  gfc_conv_class_to_class (, e, fsym->ts, false,
+	  gfc_conv_class_to_class (_se, e, fsym->ts, false,
  fsym->attr.intent != INTENT_IN
  && (CLASS_DATA (fsym)->attr.class_pointer
 	 || CLASS_DATA (fsym)->attr.allocatable),
@@ -6831,9 +6835,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
  CLASS_DATA (fsym)->attr.class_pointer
  || CLASS_DATA (fsym)->attr.allocatable);
 
-	  /* Defer repackaging after deallocation.  */
-	  if (defer_to_dealloc_blk)
-		gfc_add_block_to_block (_blk, );
+	  parmse.expr = class_se.expr;
+	  stmtblock_t *class_pre_block = defer_to_dealloc_blk ? _blk : 
+	  gfc_add_block_to_block (class_pre_block, _se.pre);
+	  gfc_add_block_to_block (, _se.post);
 	}
 	  else
 	{


Re: [PATCH 4/19]middle-end: Fix scale_loop_frequencies segfault on multiple-exits

2023-07-07 Thread Jan Hubicka via Gcc-patches
> 
> Looks good, but I wonder what we can do to at least make the
> multiple exit case behave reasonably?  The vectorizer keeps track

> of a "canonical" exit, would it be possible to pass in the main
> exit edge and use that instead of single_exit (), would other
> exits then behave somewhat reasonable or would we totally screw
> things up here?  That is, the "canonical" exit would be the
> counting exit while the other exits are on data driven conditions
> and thus wouldn't change probability when we reduce the number
> of iterations(?)

I can add canonical_exit parameter and make the function to direct flow
to it if possible.  However overall I think fixup depends on what
transformation led to the change.

Assuming that vectorizer did no prologues and apilogues and we
vectorized with factor N, then I think the update could be done more
specifically as follows.

We know that header block count dropped by 4. So we can start from that
and each time we reach basic block with exit edge, we know the original
count of the edge.  This count is unchanged, so one can rescale
probabilities out of that BB accordingly.  If loop has no inner loops,
we can just walk the body in RPO and propagate scales downwards and we
sould arrive to right result

I originally added the bound parameter to handle prologues/epilogues
which gets new artificial bound.  In prologue I think you are right that
the flow will be probably directed to the conditional counting
iterations.

In epilogue we add no artificial iteration cap, so maybe it is more
realistic to simply scale up probability of all exits?

To see what is going on I tried following testcase:

int a[99];
test()
{
  for (int i = 0; i < 99; i++)
  a[i]++;
}

What surprises me is that vectorizer at -O2 does nothing and we end up
unrolling the loop:

L2:
addl$1, (%rax)
addl$1, 4(%rax)
addl$1, 8(%rax)
addq$12, %rax
cmpq$a+396, %rax

Which seems sily thing to do. Vectorized loop with epilogue doing 2 and
1 addition would be better.

With -O3 we vectorize it:


.L2:
movdqa  (%rax), %xmm0
addq$16, %rax
paddd   %xmm1, %xmm0
movaps  %xmm0, -16(%rax)
cmpq%rax, %rdx
jne .L2
movqa+384(%rip), %xmm0
addl$1, a+392(%rip)
movq.LC1(%rip), %xmm1
paddd   %xmm1, %xmm0
movq%xmm0, a+384(%rip)


and correctly drop vectorized loop body to 24 iterations. However the
epilogue has loop for vector size 2 predicted to iterate once (it won't)

;;   basic block 7, loop depth 0, count 10737416 (estimated locally), maybe hot 
;;prev block 5, next block 8, flags: (NEW, VISITED) 
;;pred:   3 [4.0% (adjusted)]  count:10737416 (estimated locally) 
(FALSE_VALUE,EXECUTABLE)
;;succ:   8 [always]  count:10737416 (estimated locally) 
(FALLTHRU,EXECUTABLE)

;;   basic block 8, loop depth 1, count 21474835 (estimated locally), maybe hot 
;;prev block 7, next block 9, flags: (NEW, REACHABLE, VISITED)  
;;pred:   9 [always]  count:10737417 (estimated locally) 
(FALLTHRU,DFS_BACK,EXECUTABLE)
;;7 [always]  count:10737416 (estimated locally) 
(FALLTHRU,EXECUTABLE)
  # i_9 = PHI   
  # ivtmp_13 = PHI   
  # vectp_a.14_40 = PHI  [(void *) + 
384B](7)>
  # vectp_a.18_46 = PHI  [(void *) + 
384B](7)>
  # ivtmp_49 = PHI   
  vect__14.16_42 = MEM  [(int *)vectp_a.14_40];  
  _14 = a[i_9]; 
  vect__15.17_44 = vect__14.16_42 + { 1, 1 };   
  _15 = _14 + 1;
  MEM  [(int *)vectp_a.18_46] = vect__15.17_44;  
  i_17 = i_9 + 1;   
  ivtmp_18 = ivtmp_13 - 1;  
  vectp_a.14_41 = vectp_a.14_40 + 8;
  vectp_a.18_47 = vectp_a.18_46 + 8;
  ivtmp_50 = ivtmp_49 + 1;  
  if (ivtmp_50 < 1) 
goto ; [50.00%]   
  else  
goto ; [50.00%]  

and finally the scalar copy

;;   basic block 12, loop depth 0, count 10737416 (estimated locally), maybe hot
;;prev block 9, next block 13, flags: (NEW, VISITED)
;;pred:   8 [50.0% (adjusted)]  count:10737418 (estimated locally) 

RE: [PATCH 4/19]middle-end: Fix scale_loop_frequencies segfault on multiple-exits

2023-07-07 Thread Tamar Christina via Gcc-patches
Hi Both,

Thanks for all the reviews/patches so far 

> >
> > Looks good, but I wonder what we can do to at least make the multiple
> > exit case behave reasonably?  The vectorizer keeps track
> 
> > of a "canonical" exit, would it be possible to pass in the main exit
> > edge and use that instead of single_exit (), would other exits then
> > behave somewhat reasonable or would we totally screw things up here?
> > That is, the "canonical" exit would be the counting exit while the
> > other exits are on data driven conditions and thus wouldn't change
> > probability when we reduce the number of iterations(?)
> 
> I can add canonical_exit parameter and make the function to direct flow to it 
> if
> possible.  However overall I think fixup depends on what transformation led to
> the change.
> 
> Assuming that vectorizer did no prologues and apilogues and we vectorized
> with factor N, then I think the update could be done more specifically as
> follows.
> 

If it helps, how this patch series addresses multiple exits by forcing a scalar
epilogue, all non canonical_exits would have been redirected to this scalar
epilogue, so the remaining scalar iteration count will be at most VF.

Regards,
Tamar

> We know that header block count dropped by 4. So we can start from that
> and each time we reach basic block with exit edge, we know the original count
> of the edge.  This count is unchanged, so one can rescale probabilities out of
> that BB accordingly.  If loop has no inner loops, we can just walk the body in
> RPO and propagate scales downwards and we sould arrive to right result
> 
> I originally added the bound parameter to handle prologues/epilogues which
> gets new artificial bound.  In prologue I think you are right that the flow 
> will be
> probably directed to the conditional counting iterations.
> 
> In epilogue we add no artificial iteration cap, so maybe it is more realistic 
> to
> simply scale up probability of all exits?
> 
> To see what is going on I tried following testcase:
> 
> int a[99];
> test()
> {
>   for (int i = 0; i < 99; i++)
>   a[i]++;
> }
> 
> What surprises me is that vectorizer at -O2 does nothing and we end up
> unrolling the loop:
> 
> L2:
> addl$1, (%rax)
> addl$1, 4(%rax)
> addl$1, 8(%rax)
> addq$12, %rax
> cmpq$a+396, %rax
> 
> Which seems sily thing to do. Vectorized loop with epilogue doing 2 and
> 1 addition would be better.
> 
> With -O3 we vectorize it:
> 
> 
> .L2:
> movdqa  (%rax), %xmm0
> addq$16, %rax
> paddd   %xmm1, %xmm0
> movaps  %xmm0, -16(%rax)
> cmpq%rax, %rdx
> jne .L2
> movqa+384(%rip), %xmm0
> addl$1, a+392(%rip)
> movq.LC1(%rip), %xmm1
> paddd   %xmm1, %xmm0
> movq%xmm0, a+384(%rip)
> 
> 
> and correctly drop vectorized loop body to 24 iterations. However the
> epilogue has loop for vector size 2 predicted to iterate once (it won't)
> 
> ;;   basic block 7, loop depth 0, count 10737416 (estimated locally), maybe
> hot
> ;;prev block 5, next block 8, flags: (NEW, VISITED)
> ;;pred:   3 [4.0% (adjusted)]  count:10737416 (estimated locally)
> (FALSE_VALUE,EXECUTABLE)
> ;;succ:   8 [always]  count:10737416 (estimated locally)
> (FALLTHRU,EXECUTABLE)
> 
> ;;   basic block 8, loop depth 1, count 21474835 (estimated locally), maybe
> hot
> ;;prev block 7, next block 9, flags: (NEW, REACHABLE, VISITED)
> ;;pred:   9 [always]  count:10737417 (estimated locally)
> (FALLTHRU,DFS_BACK,EXECUTABLE)
> ;;7 [always]  count:10737416 (estimated locally)
> (FALLTHRU,EXECUTABLE)
>   # i_9 = PHI 
>   # ivtmp_13 = PHI 
>   # vectp_a.14_40 = PHI  [(void *) +
> 384B](7)>
>   # vectp_a.18_46 = PHI  [(void *) +
> 384B](7)>
>   # ivtmp_49 = PHI 
>   vect__14.16_42 = MEM  [(int *)vectp_a.14_40];
>   _14 = a[i_9];
>   vect__15.17_44 = vect__14.16_42 + { 1, 1 };
>   _15 = _14 + 1;
>   MEM  [(int *)vectp_a.18_46] = vect__15.17_44;
>   i_17 = i_9 + 1;
>   ivtmp_18 = ivtmp_13 - 1;
>   vectp_a.14_41 = vectp_a.14_40 + 8;
>   vectp_a.18_47 = vectp_a.18_46 + 8;
>   ivtmp_50 = ivtmp_49 + 1;
>   if (ivtmp_50 < 1)
> goto ; [50.00%]
>   else
> goto ; [50.00%]
> 
> and finally the scalar copy
> 
> ;;   basic block 12, loop depth 0, count 10737416 (estimated locally), maybe
> hot
> ;;prev block 9, next block 13, flags: (NEW, VISITED)
> ;;pred:   8 [50.0% (adjusted)]  count:10737418 (estimated locally)
> (FALSE_VALUE,EXECUTABLE)
> ;;succ:   13 [always]  count:10737416 (estimated locally) (FALLTHRU)
> 
> ;;   basic block 13, loop depth 1, count 1063004409 (estimated locally),
> maybe hot
> ;;prev block 12, next block 14, flags: (NEW, REACHABLE, VISITED)
> ;;pred:   14 [always]  count:1052266996 (estimated locally)
> (FALLTHRU,DFS_BACK,EXECUTABLE)
> ;;12 [always]  count:10737416 (estimated locally) (FALLTHRU)
>   # i_30 = 

Gcc - CIFF FASHION FAIR 2023

2023-07-07 Thread Emma Davis
Hi,
Hope this email finds you well,
Utilize our high-quality Show name attendees contact database to help you reach 
your target audience. Our database is segmented and ready to help you reach 
your goals.
We have compiled and identified 1439 contacts of visitors from CIFF FASHION 
FAIR 2023, if interested I can share the pricing and guarantees.
Looking forward for your response.
Many thanks,
Emma Davis - Senior Marketing Executive
If you do not wish to receive these messages, reply back with remove and we 
will make sure you don't receive any more emails from our end.



Re: [PATCH] gcc-ar: Handle response files properly [PR77576]

2023-07-07 Thread Costas Argyris via Gcc-patches
Bootstrapped successfully on x86_64-pc-linux-gnu

On Fri, 7 Jul 2023 at 11:33, Costas Argyris 
wrote:

> Problem: gcc-ar fails when a @file is passed to it:
>
> $ cat rsp
> --version
> $ gcc-ar @rsp
> /usr/bin/ar: invalid option -- '@'
>
> This is because a dash '-' is prepended to the first
> argument if it doesn't start with one, resulting in
> the wrong call 'ar -@rsp'.
>
> Fix: Expand argv to get rid of any @files and if any
> expansions were made, pass everything through a
> temporary response file.
>
> $ gcc-ar @rsp
> GNU ar (GNU Binutils for Debian) 2.35.2
> ...
>
>
> PR gcc-ar/77576
> * gcc/gcc-ar.cc (main): Expand argv and use
> temporary response file to call ar if any
> expansions were made.
> ---
>  gcc/gcc-ar.cc | 47 +++
>  1 file changed, 47 insertions(+)
>
> diff --git a/gcc/gcc-ar.cc b/gcc/gcc-ar.cc
> index 4e4c525927d..417c4913793 100644
> --- a/gcc/gcc-ar.cc
> +++ b/gcc/gcc-ar.cc
> @@ -135,6 +135,10 @@ main (int ac, char **av)
>int k, status, err;
>const char *err_msg;
>const char **nargv;
> +  char **old_argv;
> +  const char *rsp_file = NULL;
> +  const char *rsp_arg = NULL;
> +  const char *rsp_argv[3];
>bool is_ar = !strcmp (PERSONALITY, "ar");
>int exit_code = FATAL_EXIT_CODE;
>int i;
> @@ -209,6 +213,13 @@ main (int ac, char **av)
>   }
>  }
>
> +  /* Expand any @files before modifying the command line
> + and use a temporary response file if there were any.  */
> +  old_argv = av;
> +  expandargv (, );
> +  if (av != old_argv)
> +rsp_file = make_temp_file ("");
> +
>/* Prepend - if necessary.  */
>if (is_ar && av[1] && av[1][0] != '-')
>  av[1] = concat ("-", av[1], NULL);
> @@ -225,6 +236,39 @@ main (int ac, char **av)
>  nargv[j + k] = av[k];
>nargv[j + k] = NULL;
>
> +  /* If @file was passed, put nargv into the temporary response
> + file and then change it to a single @FILE argument, where
> + FILE is the temporary filename.  */
> +  if (rsp_file)
> +{
> +  FILE *f;
> +  int status;
> +  f = fopen (rsp_file, "w");
> +  if (f == NULL)
> +{
> +  fprintf (stderr, "Cannot open temporary file %s\n", rsp_file);
> +  exit (1);
> +}
> +  status = writeargv (
> +  CONST_CAST2 (char * const *, const char **, nargv) + 1, f);
> +  if (status)
> +{
> +  fprintf (stderr, "Cannot write to temporary file %s\n",
> rsp_file);
> +  exit (1);
> +}
> +  status = fclose (f);
> +  if (EOF == status)
> +{
> +  fprintf (stderr, "Cannot close temporary file %s\n", rsp_file);
> +  exit (1);
> +}
> +  rsp_arg = concat ("@", rsp_file, NULL);
> +  rsp_argv[0] = nargv[0];
> +  rsp_argv[1] = rsp_arg;
> +  rsp_argv[2] = NULL;
> +  nargv = rsp_argv;
> +}
> +
>/* Run utility */
>/* ??? the const is misplaced in pex_one's argv? */
>err_msg = pex_one (PEX_LAST|PEX_SEARCH,
> @@ -249,5 +293,8 @@ main (int ac, char **av)
>else
>  exit_code = SUCCESS_EXIT_CODE;
>
> +  if (rsp_file)
> +unlink (rsp_file);
> +
>return exit_code;
>  }
> --
> 2.30.2
>


Re: [PATCH v4 4/9] MIPS: Add bitwise instructions for mips16e2

2023-07-07 Thread Jan-Benedict Glaw
On Fri, 2023-07-07 16:38:51 +0800, 梅杰  wrote:
> 在 2023/7/7 07:06, Jan-Benedict Glaw 写道:
> > On Mon, 2023-06-19 16:29:53 +0800, Jie Mei  
> > wrote:
> > > There are shortened bitwise instructions in the mips16e2 ASE,
> > > for instance, ANDI, ORI/XORI, EXT, INS etc. .
[...]
> > Starting with this patch, I see some new warning:
> > 
> > [all 2023-07-06 23:04:01] g++ -c   -g -O2   -DIN_GCC 
> > -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
> > -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> > -Wcast-qual -Wmissing-format-attribute -Wconditionally-supported 
> > -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
> > -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H  -DGENERATOR_FILE -I. 
> > -Ibuild -I../../gcc/gcc -I../../gcc/gcc/build -I../../gcc/gcc/../include  
> > -I../../gcc/gcc/../libcpp/include  \
> > [all 2023-07-06 23:04:01]  -o build/gencondmd.o build/gencondmd.cc
> > [all 2023-07-06 23:04:02] ../../gcc/gcc/config/mips/mips-msa.md:435:26: 
> > warning: 'and' of mutually exclusive equal-tests is always 0
> > [all 2023-07-06 23:04:02]   435 |   DONE;
> > [all 2023-07-06 23:04:02] ../../gcc/gcc/config/mips/mips-msa.md:435:26: 
> > warning: 'and' of mutually exclusive equal-tests is always 0
> > [all 2023-07-06 23:04:03] ../../gcc/gcc/config/mips/mips.md:822:1: warning: 
> > 'and' of mutually exclusive equal-tests is always 0
> > [all 2023-07-06 23:04:03]   822 | ;; conditional-move-type condition is 
> > needed.
> > [all 2023-07-06 23:04:03]   | ^
> > [all 2023-07-06 23:04:03] g++   -g -O2   -DIN_GCC 
> > -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
> > -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> > -Wcast-qual -Wmissing-format-attribute -Wconditionally-supported 
> > -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
> > -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H  -DGENERATOR_FILE 
> > -static-libstdc++ -static-libgcc  -o build/gencondmd \
> > [all 2023-07-06 23:04:03] build/gencondmd.o build/errors.o 
> > ../build-x86_64-pc-linux-gnu/libiberty/libiberty.a
> > [all 2023-07-06 23:04:03] build/gencondmd > tmp-cond.md
> > 
> > 
> > (Full build log available as eg. 
> > http://toolchain.lug-owl.de/laminar/jobs/gcc-mips-linux/76)
> 
> The warning you mentioned above seems gone if I change the condition 
> `ISA_HAS_MIPS16E2` to `TARGET_MIPS16 && ISA_HAS_MIPS16E2` in mips.md.
> 
> But it's weird because `ISA_HAS_MIPS16E2` actually contains `TARGET_MIPS16`.
> 
> diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
> index b9eb541cf4a..77165778067 100644
> --- a/gcc/config/mips/mips.md
> +++ b/gcc/config/mips/mips.md
> @@ -3436,7 +3436,7 @@
>[(set (match_operand:GPR 0 "register_operand" "=d,d")
> (ior:GPR (match_operand:GPR 1 "register_operand" "%0,0")
>  (match_operand:GPR 2 "uns_arith_operand" "d,K")))]
> -  "ISA_HAS_MIPS16E2"
> +  "TARGET_MIPS16 && ISA_HAS_MIPS16E2"
>"@
> or\t%0,%2
> ori\t%0,%x2"

That's odd. Have you looked at preprocessed output? Maybe some
parentheses aren't as expected? Looks like an opportunity to deep-dive
into the unexpected. OTOH what's with the mips-msa.md warnings? The
pattern actually want !TARGET_MIPS16. (But I haven't really looked
into the dependencies.)

MfG, JBG

-- 


signature.asc
Description: PGP signature


Re: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread juzhe.zh...@rivai.ai
Hi, Robin.

I have fixed all issues for you with V4 patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623856.html 

1. Apply your approach on poly int
2. Fix comments.
3. Normalize "modulo" codes and make it no redundancy.
...

Could you take a look at it?

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-07-07 17:43
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV 
auto-vectorization
Hi Juzhe,
 
thanks, that's quite a chunk :) and it took me a while to
go through it.
 
> @@ -564,7 +565,14 @@ const_vec_all_in_range_p (rtx vec, poly_int64 minval, 
> poly_int64 maxval)
>  static rtx
>  gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
> -  rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
> +  scalar_mode smode = GET_MODE_INNER (mode);
> +  rtx c = gen_int_mode (val, smode);
> +  if (!val.is_constant () && GET_MODE_SIZE (smode) > GET_MODE_SIZE (Pmode))
> +{
> +  rtx dup = gen_reg_rtx (mode);
> +  emit_insn (gen_vec_duplicate (mode, dup, c));
> +  return dup;
> +}
>return gen_const_vec_duplicate (mode, c);
>  }
 
It's a bit weird that the function now also emits an insn.  It's not
similar to the aarch64 variant anymore then, I suppose.  If so, please
remove the comment.
 
> +
>  /* This function emits a masked instruction.  */
>  void
>  emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
> @@ -1162,7 +1203,6 @@ expand_const_vector (rtx target, rtx src)
>  }
>else
>  {
> -   elt = force_reg (elt_mode, elt);
>rtx ops[] = {tmp, elt};
>emit_vlmax_insn (code_for_pred_broadcast (mode), RVV_UNOP, ops);
>  }
> @@ -2431,6 +2471,25 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx 
> op0, rtx op1,
>return false;
>  }
 
elt_mode is unused after your patch.  Please remove it or we will have
a bootstrap error.
 
> @@ -2444,42 +2503,47 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx 
> sel)
>   index is in range of [0, nunits - 1]. A single vrgather instructions is
>   enough. Since we will use vrgatherei16.vv for variable-length vector,
>   it is never out of range and we don't need to modulo the index.  */
> -  if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits - 
> 1))
> +  if (nunits.is_constant () && const_vec_all_in_range_p (sel, 0, nunits - 1))
>  {
>emit_vlmax_gather_insn (target, op0, sel);
>return;
>  }
>  
>/* Check if the two values vectors are the same.  */
> -  if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
> +  if (rtx_equal_p (op0, op1))
>  {
>/* Note: vec_perm indices are supposed to wrap when they go beyond the
>  size of the two value vectors, i.e. the upper bits of the indices
>  are effectively ignored.  RVV vrgather instead produces 0 for any
>  out-of-range indices, so we need to modulo all the vec_perm indices
>  to ensure they are all in range of [0, nunits - 1].  */
> -  rtx max_sel = gen_const_vector_dup (sel_mode, nunits - 1);
> -  rtx sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 
> 0,
> - OPTAB_DIRECT);
> -  emit_vlmax_gather_insn (target, op1, sel_mod);
> +  rtx sel_mod = modulo_sel_indices (sel, nunits - 1);
> +  emit_vlmax_gather_insn (target, op0, sel_mod);
>return;
>  }
 
When reading it I considered unifying both cases and have modulo_sel_indices
just do nothing when the constant already satisfies the range requirement.
Would that work?
 
> -  OPTAB_DIRECT);
> +  poly_uint64 value = rtx_to_poly_int64 (elt);
> +  rtx op = op0;
> +  if (maybe_gt (value, nunits - 1))
> + {
> +   sel = gen_const_vector_dup (sel_mode, value - nunits);
> +   op = op1;
> + }
> +  emit_vlmax_gather_insn (target, op, sel);
>  }
 
That's again a "modulo".   Could that also fit in modulo_sel_indices?
Your call in the end, it just feels like we do the same thing kind
of differently in several places but no strict preference here.
 
> +/* Return true if it is the strided load/store.  */
> +static bool
> +strided_load_store_p (rtx vec_offset, rtx *base, rtx *step)
> +{
> +  if (const_vec_series_p (vec_offset, base, step))
> +return true;
 
The vectorizer will never emit this but we wouldn't want a step
of 1 here, right?
 
> +
> +  /* For strided load/store, vectorizer always generates
> + VEC_SERIES_EXPR for vec_offset.  */
> +  tree expr = REG_EXPR (vec_offset);
> +  if (!expr || TREE_CODE (expr) != SSA_NAME)
> +return false;
> +
> +  /* Check if it is GIMPLE like: _88 = VEC_SERIES_EXPR <0, _87>;  */
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +  if (!def_stmt || !is_gimple_assign (def_stmt)
> +  || gimple_assign_rhs_code (def_stmt) != VEC_SERIES_EXPR)
> +return false;
 
Interesting to query the gimple here.  As long as the
vectorizer doesn't do strided stores separately, I guess we can
live with that.
 
> +  rtx ptr, 

Re: [PATCH] gcc-ar: Handle response files properly [PR77576]

2023-07-07 Thread Costas Argyris via Gcc-patches
Problem: gcc-ar fails when a @file is passed to it:

$ cat rsp
--version
$ gcc-ar @rsp
/usr/bin/ar: invalid option -- '@'

This is because a dash '-' is prepended to the first
argument if it doesn't start with one, resulting in
the wrong call 'ar -@rsp'.

Fix: Expand argv to get rid of any @files and if any
expansions were made, pass everything through a
temporary response file.

$ gcc-ar @rsp
GNU ar (GNU Binutils for Debian) 2.35.2
...


PR gcc-ar/77576
* gcc/gcc-ar.cc (main): Expand argv and use
temporary response file to call ar if any
expansions were made.
---
 gcc/gcc-ar.cc | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/gcc/gcc-ar.cc b/gcc/gcc-ar.cc
index 4e4c525927d..417c4913793 100644
--- a/gcc/gcc-ar.cc
+++ b/gcc/gcc-ar.cc
@@ -135,6 +135,10 @@ main (int ac, char **av)
   int k, status, err;
   const char *err_msg;
   const char **nargv;
+  char **old_argv;
+  const char *rsp_file = NULL;
+  const char *rsp_arg = NULL;
+  const char *rsp_argv[3];
   bool is_ar = !strcmp (PERSONALITY, "ar");
   int exit_code = FATAL_EXIT_CODE;
   int i;
@@ -209,6 +213,13 @@ main (int ac, char **av)
  }
 }

+  /* Expand any @files before modifying the command line
+ and use a temporary response file if there were any.  */
+  old_argv = av;
+  expandargv (, );
+  if (av != old_argv)
+rsp_file = make_temp_file ("");
+
   /* Prepend - if necessary.  */
   if (is_ar && av[1] && av[1][0] != '-')
 av[1] = concat ("-", av[1], NULL);
@@ -225,6 +236,39 @@ main (int ac, char **av)
 nargv[j + k] = av[k];
   nargv[j + k] = NULL;

+  /* If @file was passed, put nargv into the temporary response
+ file and then change it to a single @FILE argument, where
+ FILE is the temporary filename.  */
+  if (rsp_file)
+{
+  FILE *f;
+  int status;
+  f = fopen (rsp_file, "w");
+  if (f == NULL)
+{
+  fprintf (stderr, "Cannot open temporary file %s\n", rsp_file);
+  exit (1);
+}
+  status = writeargv (
+  CONST_CAST2 (char * const *, const char **, nargv) + 1, f);
+  if (status)
+{
+  fprintf (stderr, "Cannot write to temporary file %s\n",
rsp_file);
+  exit (1);
+}
+  status = fclose (f);
+  if (EOF == status)
+{
+  fprintf (stderr, "Cannot close temporary file %s\n", rsp_file);
+  exit (1);
+}
+  rsp_arg = concat ("@", rsp_file, NULL);
+  rsp_argv[0] = nargv[0];
+  rsp_argv[1] = rsp_arg;
+  rsp_argv[2] = NULL;
+  nargv = rsp_argv;
+}
+
   /* Run utility */
   /* ??? the const is misplaced in pex_one's argv? */
   err_msg = pex_one (PEX_LAST|PEX_SEARCH,
@@ -249,5 +293,8 @@ main (int ac, char **av)
   else
 exit_code = SUCCESS_EXIT_CODE;

+  if (rsp_file)
+unlink (rsp_file);
+
   return exit_code;
 }
-- 
2.30.2


Re: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread juzhe.zh...@rivai.ai
>>When reading it I considered unifying both cases and have modulo_sel_indices
>>just do nothing when the constant already satisfies the range requirement.
>>Would that work?
I tried but it turns out to cause execution faile.

Sorry, I can try to refine this code.Thanks.


juzhe.zh...@rivai.ai
 
From: juzhe.zh...@rivai.ai
Date: 2023-07-07 18:07
To: Robin Dapp; gcc-patches
CC: Robin Dapp; kito.cheng; Kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV 
auto-vectorization
>> It's a bit weird that the function now also emits an insn.  It's not
>> similar to the aarch64 variant anymore then, I suppose.  If so, please
>> remove the comment.
Ok.

>> elt_mode is unused after your patch.  Please remove it or we will have
>> a bootstrap error.
Ok

>>When reading it I considered unifying both cases and have modulo_sel_indices
>>just do nothing when the constant already satisfies the range requirement.
>>Would that work?
I tried but it turns out to cause execution faile.


>>The vectorizer will never emit this but we wouldn't want a step
>>of 1 here, right?
No, you can take a look at strided_load-2.c

>>What's dummp?  dumb?  It looks like it's used for switching between
>>vlmax/nonvlmax so a different name might be advisable.
Sorry for typo, it should be dummy_len means length= vf that is the trick I 
play on middle-end meaning such vector operations do not affect by length.


>>The ops are similar, better to define them outside of the if/else.
>>I would also rather have both in the same emit helper but that was
>>discussed before.  The following similar patterns all look a bit
>>"boilerplate-ish".  Tolerable for now I guess.
Ok, will have a try.


>>This one I don't get.  Why do we still need to zero_extend when the
>>hardware does it for us?  Shouldn't we only sign extend when the
>>expander says so?  Actually we should even scan-assembler-not for
>>(v)zext?  Additionally maybe also scan-assembler (v)sext for the
>>respective cases.

For zero_extend with scale != 1, we need to first zero_extend then multiple the 
scale.

>>Besides, couldn't we do a widening shift when combining it with
>>scale_log != 0?
RVV has widening shift? I didn't know that. Current natural approach is first 
extend offset then shift.

>>instead.  This avoid the (surprising) weird subreg being generated
>>at all and we don't need to ensure, probably redundantly, that the
>>const_poly value is in range etc.
Ok.

>>Maybe add a single-line comment as for the other existing cases.
oK.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-07-07 17:43
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV 
auto-vectorization
Hi Juzhe,
 
thanks, that's quite a chunk :) and it took me a while to
go through it.
 
> @@ -564,7 +565,14 @@ const_vec_all_in_range_p (rtx vec, poly_int64 minval, 
> poly_int64 maxval)
>  static rtx
>  gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
> -  rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
> +  scalar_mode smode = GET_MODE_INNER (mode);
> +  rtx c = gen_int_mode (val, smode);
> +  if (!val.is_constant () && GET_MODE_SIZE (smode) > GET_MODE_SIZE (Pmode))
> +{
> +  rtx dup = gen_reg_rtx (mode);
> +  emit_insn (gen_vec_duplicate (mode, dup, c));
> +  return dup;
> +}
>return gen_const_vec_duplicate (mode, c);
>  }
 
It's a bit weird that the function now also emits an insn.  It's not
similar to the aarch64 variant anymore then, I suppose.  If so, please
remove the comment.
 
> +
>  /* This function emits a masked instruction.  */
>  void
>  emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
> @@ -1162,7 +1203,6 @@ expand_const_vector (rtx target, rtx src)
>  }
>else
>  {
> -   elt = force_reg (elt_mode, elt);
>rtx ops[] = {tmp, elt};
>emit_vlmax_insn (code_for_pred_broadcast (mode), RVV_UNOP, ops);
>  }
> @@ -2431,6 +2471,25 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx 
> op0, rtx op1,
>return false;
>  }
 
elt_mode is unused after your patch.  Please remove it or we will have
a bootstrap error.
 
> @@ -2444,42 +2503,47 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx 
> sel)
>   index is in range of [0, nunits - 1]. A single vrgather instructions is
>   enough. Since we will use vrgatherei16.vv for variable-length vector,
>   it is never out of range and we don't need to modulo the index.  */
> -  if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits - 
> 1))
> +  if (nunits.is_constant () && const_vec_all_in_range_p (sel, 0, nunits - 1))
>  {
>emit_vlmax_gather_insn (target, op0, sel);
>return;
>  }
>  
>/* Check if the two values vectors are the same.  */
> -  if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
> +  if (rtx_equal_p (op0, op1))
>  {
>/* Note: vec_perm indices are supposed to 

Re: [PATCH] Vect: use a small step to calculate induction for the unrolled loop (PR tree-optimization/110449)

2023-07-07 Thread Richard Biener via Gcc-patches
On Fri, Jul 7, 2023 at 9:53 AM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> >> Am 06.07.2023 um 19:50 schrieb Richard Sandiford 
> >> :
> >>
> >> Richard Biener via Gcc-patches  writes:
>  On Wed, Jul 5, 2023 at 8:44 AM Hao Liu OS via Gcc-patches
>   wrote:
> 
>  Hi,
> 
>  If a loop is unrolled by n times during vectoriation, two steps are used 
>  to
>  calculate the induction variable:
>   - The small step for the unrolled ith-copy: vec_1 = vec_iv + (VF/n * 
>  Step)
>   - The large step for the whole loop: vec_loop = vec_iv + (VF * Step)
> 
>  This patch calculates an extra vec_n to replace vec_loop:
>   vec_n = vec_prev + (VF/n * S) = vec_iv + (VF/n * S) * n = vec_loop.
> 
>  So that we can save the large step register and related operations.
> >>>
> >>> OK.  It would be nice to avoid the dead stmts created earlier though.
> >>
> >> FWIW, I still don't think we should do this.  Part of the point of
> >> unrolling is to shorten loop-carried dependencies, whereas this patch
> >> is going in the opposite direction.
> >
> > Note ncopies can be >1 without additional unrolling.
>
> Yeah, true.  But I think even there, avoiding a longer loop-carried
> dependency should be a good thing.
>
> > With non VLA vectors all of the updates will be constant folded btw.
>
> Are you sure?  The motivating example is an Advanced SIMD one,
> not a VLA one.  No variable-length vectors are involved.
>
> Maybe constant folding caps the dependency chain to length 2?
> But 2 is still more than 1. :)

The

  /* (A +- CST1) +- CST2 -> A + CST3
 Use view_convert because it is safe for vectors and equivalent for
 scalars.  */
  (for outer_op (plus minus)
   (for inner_op (plus minus)
neg_inner_op (minus plus)

pattern should apply here for example during forwprop.  It handles
vector constants just fine, so I wonder why it doesn't trigger.

Richard.

> Thanks,
> Richard
>
> >
> > Richard
> >
> >> Richard
> >>
> >>>
> >>> Thanks,
> >>> Richard.
> >>>
>  gcc/ChangeLog:
> 
> PR tree-optimization/110449
> * tree-vect-loop.cc (vectorizable_induction): use vec_n to replace
> vec_loop for the unrolled loop.
> 
>  gcc/testsuite/ChangeLog:
> 
> * gcc.target/aarch64/pr110449.c: New testcase.
>  ---
>  gcc/testsuite/gcc.target/aarch64/pr110449.c | 40 +
>  gcc/tree-vect-loop.cc   | 21 +--
>  2 files changed, 58 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr110449.c
> 
>  diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c 
>  b/gcc/testsuite/gcc.target/aarch64/pr110449.c
>  new file mode 100644
>  index 000..bb3b6dcfe08
>  --- /dev/null
>  +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
>  @@ -0,0 +1,40 @@
>  +/* { dg-do compile } */
>  +/* { dg-options "-Ofast -mcpu=neoverse-n2 --param 
>  aarch64-vect-unroll-limit=2" } */
>  +/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
>  +
>  +/* Calcualte the vectorized induction with smaller step for an unrolled 
>  loop.
>  +
>  +   before (suggested_unroll_factor=2):
>  + fmovs30, 8.0e+0
>  + fmovs31, 4.0e+0
>  + dup v27.4s, v30.s[0]
>  + dup v28.4s, v31.s[0]
>  + .L6:
>  + mov v30.16b, v31.16b
>  + faddv31.4s, v31.4s, v27.4s
>  + faddv29.4s, v30.4s, v28.4s
>  + stp q30, q29, [x0]
>  + add x0, x0, 32
>  + cmp x1, x0
>  + bne .L6
>  +
>  +   after:
>  + fmovs31, 4.0e+0
>  + dup v29.4s, v31.s[0]
>  + .L6:
>  + faddv30.4s, v31.4s, v29.4s
>  + stp q31, q30, [x0]
>  + add x0, x0, 32
>  + faddv31.4s, v29.4s, v30.4s
>  + cmp x0, x1
>  + bne .L6  */
>  +
>  +void
>  +foo2 (float *arr, float freq, float step)
>  +{
>  +  for (int i = 0; i < 1024; i++)
>  +{
>  +  arr[i] = freq;
>  +  freq += step;
>  +}
>  +}
>  diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
>  index 3b46c58a8d8..706ecbffd0c 100644
>  --- a/gcc/tree-vect-loop.cc
>  +++ b/gcc/tree-vect-loop.cc
>  @@ -10114,7 +10114,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>    new_vec, step_vectype, NULL);
> 
>    vec_def = induc_def;
>  -  for (i = 1; i < ncopies; i++)
>  +  for (i = 1; i < ncopies + 1; i++)
> {
>   /* vec_i = vec_prev + vec_step  */
>   gimple_seq stmts = NULL;
>  @@ -10124,8 +10124,23 @@ vectorizable_induction (loop_vec_info 
>  loop_vinfo,
>   

Re: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread juzhe.zh...@rivai.ai
>> It's a bit weird that the function now also emits an insn.  It's not
>> similar to the aarch64 variant anymore then, I suppose.  If so, please
>> remove the comment.
Ok.

>> elt_mode is unused after your patch.  Please remove it or we will have
>> a bootstrap error.
Ok

>>When reading it I considered unifying both cases and have modulo_sel_indices
>>just do nothing when the constant already satisfies the range requirement.
>>Would that work?
I tried but it turns out to cause execution faile.


>>The vectorizer will never emit this but we wouldn't want a step
>>of 1 here, right?
No, you can take a look at strided_load-2.c

>>What's dummp?  dumb?  It looks like it's used for switching between
>>vlmax/nonvlmax so a different name might be advisable.
Sorry for typo, it should be dummy_len means length= vf that is the trick I 
play on middle-end meaning such vector operations do not affect by length.


>>The ops are similar, better to define them outside of the if/else.
>>I would also rather have both in the same emit helper but that was
>>discussed before.  The following similar patterns all look a bit
>>"boilerplate-ish".  Tolerable for now I guess.
Ok, will have a try.


>>This one I don't get.  Why do we still need to zero_extend when the
>>hardware does it for us?  Shouldn't we only sign extend when the
>>expander says so?  Actually we should even scan-assembler-not for
>>(v)zext?  Additionally maybe also scan-assembler (v)sext for the
>>respective cases.

For zero_extend with scale != 1, we need to first zero_extend then multiple the 
scale.

>>Besides, couldn't we do a widening shift when combining it with
>>scale_log != 0?
RVV has widening shift? I didn't know that. Current natural approach is first 
extend offset then shift.

>>instead.  This avoid the (surprising) weird subreg being generated
>>at all and we don't need to ensure, probably redundantly, that the
>>const_poly value is in range etc.
Ok.

>>Maybe add a single-line comment as for the other existing cases.
oK.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-07-07 17:43
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV 
auto-vectorization
Hi Juzhe,
 
thanks, that's quite a chunk :) and it took me a while to
go through it.
 
> @@ -564,7 +565,14 @@ const_vec_all_in_range_p (rtx vec, poly_int64 minval, 
> poly_int64 maxval)
>  static rtx
>  gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
> -  rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
> +  scalar_mode smode = GET_MODE_INNER (mode);
> +  rtx c = gen_int_mode (val, smode);
> +  if (!val.is_constant () && GET_MODE_SIZE (smode) > GET_MODE_SIZE (Pmode))
> +{
> +  rtx dup = gen_reg_rtx (mode);
> +  emit_insn (gen_vec_duplicate (mode, dup, c));
> +  return dup;
> +}
>return gen_const_vec_duplicate (mode, c);
>  }
 
It's a bit weird that the function now also emits an insn.  It's not
similar to the aarch64 variant anymore then, I suppose.  If so, please
remove the comment.
 
> +
>  /* This function emits a masked instruction.  */
>  void
>  emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
> @@ -1162,7 +1203,6 @@ expand_const_vector (rtx target, rtx src)
>  }
>else
>  {
> -   elt = force_reg (elt_mode, elt);
>rtx ops[] = {tmp, elt};
>emit_vlmax_insn (code_for_pred_broadcast (mode), RVV_UNOP, ops);
>  }
> @@ -2431,6 +2471,25 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx 
> op0, rtx op1,
>return false;
>  }
 
elt_mode is unused after your patch.  Please remove it or we will have
a bootstrap error.
 
> @@ -2444,42 +2503,47 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx 
> sel)
>   index is in range of [0, nunits - 1]. A single vrgather instructions is
>   enough. Since we will use vrgatherei16.vv for variable-length vector,
>   it is never out of range and we don't need to modulo the index.  */
> -  if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits - 
> 1))
> +  if (nunits.is_constant () && const_vec_all_in_range_p (sel, 0, nunits - 1))
>  {
>emit_vlmax_gather_insn (target, op0, sel);
>return;
>  }
>  
>/* Check if the two values vectors are the same.  */
> -  if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
> +  if (rtx_equal_p (op0, op1))
>  {
>/* Note: vec_perm indices are supposed to wrap when they go beyond the
>  size of the two value vectors, i.e. the upper bits of the indices
>  are effectively ignored.  RVV vrgather instead produces 0 for any
>  out-of-range indices, so we need to modulo all the vec_perm indices
>  to ensure they are all in range of [0, nunits - 1].  */
> -  rtx max_sel = gen_const_vector_dup (sel_mode, nunits - 1);
> -  rtx sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 
> 0,
> - OPTAB_DIRECT);
> -  emit_vlmax_gather_insn (target, op1, sel_mod);
> +  

Re: [PATCH] vect: Fix vectorized BIT_FIELD_REF for signed bit-fields [PR110557]

2023-07-07 Thread Xi Ruoyao via Gcc-patches
On Fri, 2023-07-07 at 08:15 +0200, Richard Biener wrote:

/* snip */

> > +  bool sign_ext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
> > +  TYPE_PRECISION (ret_type) > mask_width);
> > +  bool widening = ((TYPE_PRECISION (TREE_TYPE (container)) <
> > +   TYPE_PRECISION (ret_type))
> > +  && !useless_type_conversion_p (TREE_TYPE (container),
> > + ret_type));
> 
> the !useless_type_conversion_p check isn't necessary, when TYPE_PRECISION
> isn't equal the conversion is never useless.

I'll drop it.

> I'll also note that ret_type == TREE_TYPE (bf_ref).

No, ret_type == TREE_TYPE (ret), not TREE_TYPE (bf_ref).  For something
like

struct Item
  {
int x : 30;
int y : 30;
  };

Item *p = get();
unsigned long t = p->y;

Then TREE_TYPE (ret) is unsigned long, and TREE_TYPE (bf_ref) is int. 
In this case we still need to perform the sign extension: if p->y is -1
we should have -1ul in t.  So we need to check the signedness of
TREE_TYPE (bf_ref).

> Can you rename 'widening' to 'load_widen' and 'sign_ext' to 'ref_sext'?  As 
> they
> are named it suggest they apply to the same so I originally thought sign_ext
> should be widening && !TYPE_UNSIGNED.

I'll rename them.

I'll send a v2 after testing it.
> 

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH V3] RISC-V: Support gather_load/scatter RVV auto-vectorization

2023-07-07 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

thanks, that's quite a chunk :) and it took me a while to
go through it.

> @@ -564,7 +565,14 @@ const_vec_all_in_range_p (rtx vec, poly_int64 minval, 
> poly_int64 maxval)
>  static rtx
>  gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
> -  rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
> +  scalar_mode smode = GET_MODE_INNER (mode);
> +  rtx c = gen_int_mode (val, smode);
> +  if (!val.is_constant () && GET_MODE_SIZE (smode) > GET_MODE_SIZE (Pmode))
> +{
> +  rtx dup = gen_reg_rtx (mode);
> +  emit_insn (gen_vec_duplicate (mode, dup, c));
> +  return dup;
> +}
>return gen_const_vec_duplicate (mode, c);
>  }

It's a bit weird that the function now also emits an insn.  It's not
similar to the aarch64 variant anymore then, I suppose.  If so, please
remove the comment.

> +
>  /* This function emits a masked instruction.  */
>  void
>  emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
> @@ -1162,7 +1203,6 @@ expand_const_vector (rtx target, rtx src)
>   }
>else
>   {
> -   elt = force_reg (elt_mode, elt);
> rtx ops[] = {tmp, elt};
> emit_vlmax_insn (code_for_pred_broadcast (mode), RVV_UNOP, ops);
>   }
> @@ -2431,6 +2471,25 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx 
> op0, rtx op1,
>return false;
>  }

elt_mode is unused after your patch.  Please remove it or we will have
a bootstrap error.

> @@ -2444,42 +2503,47 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx 
> sel)
>   index is in range of [0, nunits - 1]. A single vrgather instructions is
>   enough. Since we will use vrgatherei16.vv for variable-length vector,
>   it is never out of range and we don't need to modulo the index.  */
> -  if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits - 
> 1))
> +  if (nunits.is_constant () && const_vec_all_in_range_p (sel, 0, nunits - 1))
>  {
>emit_vlmax_gather_insn (target, op0, sel);
>return;
>  }
>  
>/* Check if the two values vectors are the same.  */
> -  if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
> +  if (rtx_equal_p (op0, op1))
>  {
>/* Note: vec_perm indices are supposed to wrap when they go beyond the
>size of the two value vectors, i.e. the upper bits of the indices
>are effectively ignored.  RVV vrgather instead produces 0 for any
>out-of-range indices, so we need to modulo all the vec_perm indices
>to ensure they are all in range of [0, nunits - 1].  */
> -  rtx max_sel = gen_const_vector_dup (sel_mode, nunits - 1);
> -  rtx sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 
> 0,
> -  OPTAB_DIRECT);
> -  emit_vlmax_gather_insn (target, op1, sel_mod);
> +  rtx sel_mod = modulo_sel_indices (sel, nunits - 1);
> +  emit_vlmax_gather_insn (target, op0, sel_mod);
>return;
>  }

When reading it I considered unifying both cases and have modulo_sel_indices
just do nothing when the constant already satisfies the range requirement.
Would that work?

> -  OPTAB_DIRECT);
> +  poly_uint64 value = rtx_to_poly_int64 (elt);
> +  rtx op = op0;
> +  if (maybe_gt (value, nunits - 1))
> + {
> +   sel = gen_const_vector_dup (sel_mode, value - nunits);
> +   op = op1;
> + }
> +  emit_vlmax_gather_insn (target, op, sel);
>  }

That's again a "modulo".   Could that also fit in modulo_sel_indices?
Your call in the end, it just feels like we do the same thing kind
of differently in several places but no strict preference here.

> +/* Return true if it is the strided load/store.  */
> +static bool
> +strided_load_store_p (rtx vec_offset, rtx *base, rtx *step)
> +{
> +  if (const_vec_series_p (vec_offset, base, step))
> +return true;

The vectorizer will never emit this but we wouldn't want a step
of 1 here, right?

> +
> +  /* For strided load/store, vectorizer always generates
> + VEC_SERIES_EXPR for vec_offset.  */
> +  tree expr = REG_EXPR (vec_offset);
> +  if (!expr || TREE_CODE (expr) != SSA_NAME)
> +return false;
> +
> +  /* Check if it is GIMPLE like: _88 = VEC_SERIES_EXPR <0, _87>;  */
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +  if (!def_stmt || !is_gimple_assign (def_stmt)
> +  || gimple_assign_rhs_code (def_stmt) != VEC_SERIES_EXPR)
> +return false;

Interesting to query the gimple here.  As long as the
vectorizer doesn't do strided stores separately, I guess we can
live with that.

> +  rtx ptr, vec_offset, vec_reg, len, mask;
> +  bool zero_extend_p;
> +  int scale_log2;
> +  if (is_load)
> +{
> +  vec_reg = ops[0];
> +  ptr = ops[1];
> +  vec_offset = ops[2];
> +  zero_extend_p = INTVAL (ops[3]);
> +  scale_log2 = exact_log2 (INTVAL (ops[4]));
> +  len = ops[5];
> +  mask = ops[7];
> +}
> +  else
> +{
> +  vec_reg = ops[4];
> +  ptr = 

[PATCH 2/2]middle-end ifcvt: Sort PHI arguments not only occurrences but also complexity [PR109154]

2023-07-07 Thread Tamar Christina via Gcc-patches
Hi All,

This patch builds on the previous patch by fixing another issue with the
way ifcvt currently picks which branches to test.

The issue with the current implementation is while it sorts for
occurrences of the argument, it doesn't check for complexity of the arguments.

As an example:

   [local count: 528603100]:
  ...
  if (distbb_75 >= 0.0)
goto ; [59.00%]
  else
goto ; [41.00%]

   [local count: 216727269]:
  ...
  goto ; [100.00%]

   [local count: 311875831]:
  ...
  if (distbb_75 < iftmp.0_98)
goto ; [20.00%]
  else
goto ; [80.00%]

   [local count: 62375167]:
  ...

   [local count: 528603100]:
  # prephitmp_175 = PHI <_173(18), 0.0(17), _174(16)>

All tree arguments to the PHI have the same number of occurrences, namely 1,
however it makes a big difference which comparison we test first.

Sorting only on occurrences we'll pick the compares coming from BB 18 and BB 17,
This means we end up generating 4 comparisons, while 2 would have been enough.

By keeping track of the "complexity" of the COND in each BB, (i.e. the number
of comparisons needed to traverse from the start [BB 15] to end [BB 19]) and
using a key tuple of  we end up selecting the compare
from BB 16 and BB 18 first.  BB 16 only requires 1 compare, and BB 18, after we
test BB 16 also only requires one additional compare.  This change paired with
the one previous above results in the optimal 2 compares.

For deep nesting, i.e. for

...
  _79 = vr_15 > 20;
  _80 = _68 & _79;
  _82 = vr_15 <= 20;
  _83 = _68 & _82;
  _84 = vr_15 < -20;
  _85 = _73 & _84;
  _87 = vr_15 >= -20;
  _88 = _73 & _87;
  _ifc__111 = _55 ? 10 : 12;
  _ifc__112 = _70 ? 7 : _ifc__111;
  _ifc__113 = _85 ? 8 : _ifc__112;
  _ifc__114 = _88 ? 9 : _ifc__113;
  _ifc__115 = _45 ? 1 : _ifc__114;
  _ifc__116 = _63 ? 3 : _ifc__115;
  _ifc__117 = _65 ? 4 : _ifc__116;
  _ifc__118 = _83 ? 6 : _ifc__117;
  _ifc__119 = _60 ? 2 : _ifc__118;
  _ifc__120 = _43 ? 13 : _ifc__119;
  _ifc__121 = _75 ? 11 : _ifc__120;
  vw_1 = _80 ? 5 : _ifc__121;

Most of the comparisons are still needed because the chain of
occurrences to not negate eachother. i.e. _80 is _73 & vr_15 >= -20 and
_85 is _73 & vr_15 < -20.  clearly given _73 needs to be true in both branches,
the only additional test needed is on vr_15, where the one test is the negation
of the other.  So we don't need to do the comparison of _73 twice.

The changes in the patch reduces the overall number of compares by one, but has
a bigger effect on the dependency chain.

Previously we would generate 5 instructions chain:

cmple   p7.s, p4/z, z29.s, z30.s
cmpne   p7.s, p7/z, z29.s, #0
cmple   p6.s, p7/z, z31.s, z30.s
cmpge   p6.s, p6/z, z27.s, z25.s
cmplt   p15.s, p6/z, z28.s, z21.s

as the longest chain.  With this patch we generate 3:

cmple   p7.s, p3/z, z27.s, z30.s
cmpne   p7.s, p7/z, z27.s, #0
cmpgt   p7.s, p7/z, z31.s, z30.s

and I don't think (x <= y) && (x != 0) && (z > y) can be reduced further.

Bootstrapped and Regtested on aarch64-none-linux-gnu and no issues.

Not sure how to write a non-fragile testcase for this as the
conditionals chosen depends on threading etc. Any Suggestions?

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/109154
* tree-if-conv.cc (INCLUDE_ALGORITHM): Include.
(struct bb_predicate): Add no_predicate_stmts.
(set_bb_predicate): Increase predicate count.
(set_bb_predicate_gimplified_stmts): Conditionally initialize
no_predicate_stmts.
(get_bb_num_predicate_stmts): New.
(init_bb_predicate): Initialzie no_predicate_stmts.
(release_bb_predicate): Cleanup no_predicate_stmts.
(insert_gimplified_predicates): Preserve no_predicate_stmts.

--- inline copy of patch -- 
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 
16b36dd8b0226f796c1a3fc6d45a9059385e812b..0ed50d99c46f99a4d1ea0e827ee2b2a3f494b2da
 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -80,6 +80,7 @@ along with GCC; see the file COPYING3.  If not see
  :;
 */
 
+#define INCLUDE_ALGORITHM
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -231,6 +232,10 @@ struct bb_predicate {
  recorded here, in order to avoid the duplication of computations
  that occur in previous conditions.  See PR44483.  */
   gimple_seq predicate_gimplified_stmts;
+
+  /* Records the number of statements recorded into
+ PREDICATE_GIMPLIFIED_STMTS.   */
+  unsigned no_predicate_stmts;
 };
 
 /* Returns true when the basic block BB has a predicate.  */
@@ -254,10 +259,16 @@ bb_predicate (basic_block bb)
 static inline void
 set_bb_predicate (basic_block bb, tree cond)
 {
+  auto aux = (struct bb_predicate *) bb->aux;
   gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
   && is_gimple_val (TREE_OPERAND (cond, 0)))
  || is_gimple_val (cond));
-  ((struct bb_predicate *) bb->aux)->predicate = cond;
+  aux->predicate 

[PATCH 1/2]middle-end ifcvt: Reduce comparisons on conditionals by tracking truths [PR109154]

2023-07-07 Thread Tamar Christina via Gcc-patches
Hi All,

Following on from Jakub's patch in g:de0ee9d14165eebb3d31c84e98260c05c3b33acb
these two patches finishes the work fixing the regression and improves codegen.

As explained in that commit, ifconvert sorts PHI args in increasing number of
occurrences in order to reduce the number of comparisons done while
traversing the tree.

The remaining task that this patch fixes is dealing with the long chain of
comparisons that can be created from phi nodes, particularly when they share
any common successor (classical example is a diamond node).

on a PHI-node the true and else branches carry a condition, true will
carry `a` and false `~a`.  The issue is that at the moment GCC tests both `a`
and `~a` when the phi node has more than 2 arguments. Clearly this isn't
needed.  The deeper the nesting of phi nodes the larger the repetition.

As an example, for

foo (int *f, int d, int e)
{
  for (int i = 0; i < 1024; i++)
{
  int a = f[i];
  int t;
  if (a < 0)
t = 1;
  else if (a < e)
t = 1 - a * d;
  else
t = 0;
  f[i] = t;
}
}

after Jakub's patch we generate:

  _7 = a_10 < 0;
  _21 = a_10 >= 0;
  _22 = a_10 < e_11(D);
  _23 = _21 & _22;
  _ifc__42 = _23 ? t_13 : 0;
  t_6 = _7 ? 1 : _ifc__42

but while better than before it is still inefficient, since in the false
branch, where we know ~_7 is true, we still test _21.

This leads to superfluous tests for every diamond node.  After this patch we
generate

 _7 = a_10 < 0;
 _22 = a_10 < e_11(D);
 _ifc__42 = _22 ? t_13 : 0;
 t_6 = _7 ? 1 : _ifc__42;

Which correctly elides the test of _21.  This is done by borrowing the
vectorizer's helper functions to limit predicate mask usages.  Ifcvt will chain
conditionals on the false edge (unless specifically inverted) so this patch on
creating cond a ? b : c, will register ~a when traversing c.  If c is a
conditional then c will be simplified to the smaller possible predicate given
the assumptions we already know to be true.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Not sure how to write a non-fragile testcase for this as the
conditionals chosen depends on threading etc. Any Suggestions?

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/109154
* tree-if-conv.cc (gen_simplified_condition,
gen_phi_nest_statement): New.
(gen_phi_arg_condition, predicate_scalar_phi): Use it.

--- inline copy of patch -- 
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 
e342532a343a3c066142adeec5fdfaf736a653e5..16b36dd8b0226f796c1a3fc6d45a9059385e812b
 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1870,12 +1870,44 @@ convert_scalar_cond_reduction (gimple *reduc, 
gimple_stmt_iterator *gsi,
   return rhs;
 }
 
+/* Generate a simplified conditional.  */
+
+static tree
+gen_simplified_condition (tree cond, scalar_cond_masked_set_type _set)
+{
+  /* Check if the value is already live in a previous branch.  This resolves
+ nested conditionals from diamond PHI reductions.  */
+  if (TREE_CODE (cond) == SSA_NAME)
+{
+  gimple *stmt = SSA_NAME_DEF_STMT (cond);
+  gassign *assign = NULL;
+  if ((assign = as_a  (stmt))
+  && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
+   {
+ tree arg1 = gimple_assign_rhs1 (assign);
+ tree arg2 = gimple_assign_rhs2 (assign);
+ if (cond_set.contains ({ arg1, 1 }))
+   arg1 = boolean_true_node;
+ else
+   arg1 = gen_simplified_condition (arg1, cond_set);
+
+ if (cond_set.contains ({ arg2, 1 }))
+   arg2 = boolean_true_node;
+ else
+   arg2 = gen_simplified_condition (arg2, cond_set);
+
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
+   }
+}
+  return cond;
+}
+
 /* Produce condition for all occurrences of ARG in PHI node.  Set *INVERT
as to whether the condition is inverted.  */
 
 static tree
-gen_phi_arg_condition (gphi *phi, vec *occur,
-  gimple_stmt_iterator *gsi, bool *invert)
+gen_phi_arg_condition (gphi *phi, vec *occur, gimple_stmt_iterator *gsi,
+  scalar_cond_masked_set_type _set, bool *invert)
 {
   int len;
   int i;
@@ -1902,6 +1934,8 @@ gen_phi_arg_condition (gphi *phi, vec *occur,
  c = TREE_OPERAND (c, 0);
  *invert = true;
}
+
+  c = gen_simplified_condition (c, cond_set);
   c = force_gimple_operand_gsi (gsi, unshare_expr (c),
true, NULL_TREE, true, GSI_SAME_STMT);
   if (cond != NULL_TREE)
@@ -1913,11 +1947,79 @@ gen_phi_arg_condition (gphi *phi, vec *occur,
}
   else
cond = c;
+
+  /* Register the new possibly simplified conditional.  When more than 2
+entries in a phi node we chain entries in the false branch, so the
+inverted condition is active.  */
+  scalar_cond_masked_key pred_cond ({ cond, 1 });
+  if (!invert)
+   

RE: Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL PASS[PR110560]

2023-07-07 Thread Li, Pan2 via Gcc-patches
Committed to gcc-13, thanks Juzhe and Kito.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Friday, July 7, 2023 5:07 PM
To: kito.cheng 
Cc: Li Xu ; gcc-patches ; 
palmer ; zhengyu ; Li, Pan2 

Subject: Re: Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
PASS[PR110560]

CCing Li Pan to backport this patch.

Thanks.


juzhe.zh...@rivai.ai

From: Kito Cheng
Date: 2023-07-07 17:04
To: juzhe.zh...@rivai.ai
CC: Li Xu; 
gcc-patches; palmer; 
zhengyu
Subject: Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
PASS[PR110560]
LGTM

On Fri, Jul 7, 2023 at 4:26 PM juzhe.zh...@rivai.ai
mailto:juzhe.zh...@rivai.ai>> wrote:
>
> LGTM. Thanks.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Li Xu
> Date: 2023-07-07 16:22
> To: gcc-patches
> CC: kito.cheng; palmer; juzhe.zhong; zhengyu; Li Xu
> Subject: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
> PASS[PR110560]
> This patch fixes this issue happens on GCC-13.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110560
>
> This patch should be backported to GCC-13.
> GCC-14 has rewritten this function, so there is no issue.
>
> PR target/110560
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Fix bug.
> ---
> gcc/config/riscv/riscv-vsetvl.cc | 4 
> 1 file changed, 4 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index 35403b5679c..3355ca4e3fb 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -1078,6 +1078,10 @@ local_eliminate_vsetvl_insn (const vector_insn_info 
> )
>   if (has_vtype_op (i->rtl ()))
> {
> +   if (!PREV_INSN (i->rtl ()))
> + return;
> +   if (!NONJUMP_INSN_P (PREV_INSN (i->rtl (
> + return;
>   if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl (
> return;
>   rtx avl = get_avl (i->rtl ());
> --
> 2.17.1
>
>



Re: Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL PASS[PR110560]

2023-07-07 Thread juzhe.zh...@rivai.ai
CCing Li Pan to backport this patch.

Thanks.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-07-07 17:04
To: juzhe.zh...@rivai.ai
CC: Li Xu; gcc-patches; palmer; zhengyu
Subject: Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
PASS[PR110560]
LGTM
 
On Fri, Jul 7, 2023 at 4:26 PM juzhe.zh...@rivai.ai
 wrote:
>
> LGTM. Thanks.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Li Xu
> Date: 2023-07-07 16:22
> To: gcc-patches
> CC: kito.cheng; palmer; juzhe.zhong; zhengyu; Li Xu
> Subject: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
> PASS[PR110560]
> This patch fixes this issue happens on GCC-13.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110560
>
> This patch should be backported to GCC-13.
> GCC-14 has rewritten this function, so there is no issue.
>
> PR target/110560
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Fix bug.
> ---
> gcc/config/riscv/riscv-vsetvl.cc | 4 
> 1 file changed, 4 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index 35403b5679c..3355ca4e3fb 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -1078,6 +1078,10 @@ local_eliminate_vsetvl_insn (const vector_insn_info 
> )
>   if (has_vtype_op (i->rtl ()))
> {
> +   if (!PREV_INSN (i->rtl ()))
> + return;
> +   if (!NONJUMP_INSN_P (PREV_INSN (i->rtl (
> + return;
>   if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl (
> return;
>   rtx avl = get_avl (i->rtl ());
> --
> 2.17.1
>
>
 


Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL PASS[PR110560]

2023-07-07 Thread Kito Cheng via Gcc-patches
LGTM

On Fri, Jul 7, 2023 at 4:26 PM juzhe.zh...@rivai.ai
 wrote:
>
> LGTM. Thanks.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Li Xu
> Date: 2023-07-07 16:22
> To: gcc-patches
> CC: kito.cheng; palmer; juzhe.zhong; zhengyu; Li Xu
> Subject: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
> PASS[PR110560]
> This patch fixes this issue happens on GCC-13.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110560
>
> This patch should be backported to GCC-13.
> GCC-14 has rewritten this function, so there is no issue.
>
> PR target/110560
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Fix bug.
> ---
> gcc/config/riscv/riscv-vsetvl.cc | 4 
> 1 file changed, 4 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index 35403b5679c..3355ca4e3fb 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -1078,6 +1078,10 @@ local_eliminate_vsetvl_insn (const vector_insn_info 
> )
>   if (has_vtype_op (i->rtl ()))
> {
> +   if (!PREV_INSN (i->rtl ()))
> + return;
> +   if (!NONJUMP_INSN_P (PREV_INSN (i->rtl (
> + return;
>   if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl (
> return;
>   rtx avl = get_avl (i->rtl ());
> --
> 2.17.1
>
>


Re: [COMMITTED] ada: Fix expanding container aggregates

2023-07-07 Thread Eric Botcazou via Gcc-patches
> Ensure that that container aggregate expressions are expanded as
> such and not as records even if the type of the expression is a
> record.
> 
> gcc/ada/
> 
>   * exp_aggr.adb (Expand_N_Aggregate): Ensure that container
>   aggregate expressions do not get expanded as records but instead
>   as container aggregates.

This is not a regression but the problem is quite visible in Ada 2022 so I 
backported the fix onto the 13 branch.

-- 
Eric Botcazou





Re: [COMMITTED] ada: Fix internal error on aggregate within container aggregate

2023-07-07 Thread Eric Botcazou via Gcc-patches
> This just applies the same fix to Expand_Array_Aggregate as the one that was
> recently applied to Convert_To_Assignments.
> 
> gcc/ada/
> 
>   * exp_aggr.adb (Convert_To_Assignments): Tweak comment.
>   (Expand_Array_Aggregate): Do not delay the expansion if the parent
>   node is a container aggregate.

This is not a regression but the problem is quite visible in Ada 2022 so I 
backported the fix onto the 13 branch.

-- 
Eric Botcazou




Re: [COMMITTED] ada: Fix crash on vector initialization

2023-07-07 Thread Eric Botcazou via Gcc-patches
> Such assignments to container aggregates are later transformed into
> procedure calls to the procedures named in the Aggregate aspect
> definition, for which the delayed expansion is not required/expected.
> 
> gcc/ada/
> 
>   * exp_aggr.adb (Convert_To_Assignments): Do not mark node for
>   delayed expansion if parent type has the Aggregate aspect.
>   * sem_util.adb (Is_Container_Aggregate): Move...
>   * sem_util.ads (Is_Container_Aggregate): ... here and make it
>   public.

This is not a regression but the problem is quite visible in Ada 2022 so I 
backported the fix onto the 13 branch.

-- 
Eric Botcazou




Re: [PATCH v4 4/9] MIPS: Add bitwise instructions for mips16e2

2023-07-07 Thread 梅杰
Hi,

在 2023/7/7 07:06, Jan-Benedict Glaw 写道:
> Hi!
> 
> On Mon, 2023-06-19 16:29:53 +0800, Jie Mei  wrote:
>> There are shortened bitwise instructions in the mips16e2 ASE,
>> for instance, ANDI, ORI/XORI, EXT, INS etc. .
>>
>> This patch adds these instrutions with corresponding tests.
> 
> [...]
> 
> Starting with this patch, I see some new warning:
> 
> [all 2023-07-06 23:04:01] g++ -c   -g -O2   -DIN_GCC 
> -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
> -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> -Wcast-qual -Wmissing-format-attribute -Wconditionally-supported 
> -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
> -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H  -DGENERATOR_FILE -I. 
> -Ibuild -I../../gcc/gcc -I../../gcc/gcc/build -I../../gcc/gcc/../include  
> -I../../gcc/gcc/../libcpp/include  \
> [all 2023-07-06 23:04:01]  -o build/gencondmd.o build/gencondmd.cc
> [all 2023-07-06 23:04:02] ../../gcc/gcc/config/mips/mips-msa.md:435:26: 
> warning: 'and' of mutually exclusive equal-tests is always 0
> [all 2023-07-06 23:04:02]   435 |   DONE;
> [all 2023-07-06 23:04:02] ../../gcc/gcc/config/mips/mips-msa.md:435:26: 
> warning: 'and' of mutually exclusive equal-tests is always 0
> [all 2023-07-06 23:04:03] ../../gcc/gcc/config/mips/mips.md:822:1: warning: 
> 'and' of mutually exclusive equal-tests is always 0
> [all 2023-07-06 23:04:03]   822 | ;; conditional-move-type condition is 
> needed.
> [all 2023-07-06 23:04:03]   | ^
> [all 2023-07-06 23:04:03] g++   -g -O2   -DIN_GCC -DCROSS_DIRECTORY_STRUCTURE 
>   -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall 
> -Wno-narrowing -Wwrite-strings -Wcast-qual -Wmissing-format-attribute 
> -Wconditionally-supported -Woverloaded-virtual -pedantic -Wno-long-long 
> -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H  
> -DGENERATOR_FILE -static-libstdc++ -static-libgcc  -o build/gencondmd \
> [all 2023-07-06 23:04:03] build/gencondmd.o build/errors.o 
> ../build-x86_64-pc-linux-gnu/libiberty/libiberty.a
> [all 2023-07-06 23:04:03] build/gencondmd > tmp-cond.md
> 
> 
> (Full build log available as eg. 
> http://toolchain.lug-owl.de/laminar/jobs/gcc-mips-linux/76)
> 
> Thanks, JBG
> 

The warning you mentioned above seems gone if I change the condition 
`ISA_HAS_MIPS16E2` to `TARGET_MIPS16 && ISA_HAS_MIPS16E2` in mips.md.

But it's weird because `ISA_HAS_MIPS16E2` actually contains `TARGET_MIPS16`.

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index b9eb541cf4a..77165778067 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -3436,7 +3436,7 @@
   [(set (match_operand:GPR 0 "register_operand" "=d,d")
(ior:GPR (match_operand:GPR 1 "register_operand" "%0,0")
 (match_operand:GPR 2 "uns_arith_operand" "d,K")))]
-  "ISA_HAS_MIPS16E2"
+  "TARGET_MIPS16 && ISA_HAS_MIPS16E2"
   "@
or\t%0,%2
ori\t%0,%x2"


Thanks,
Jie.


[PING][PATCH] tree-optimization/110279- Check for nested FMA chains in reassoc

2023-07-07 Thread Di Zhao OS via Gcc-patches
Update the patch so it can apply.

Tested on spec2017 fprate cases again. With option "-funroll-loops -Ofast 
-flto",
the improvements of 1-copy run are:

Ampere1:
508.namd_r  4.26% 
510.parest_r2.55%
Overall 0.54%
Intel Xeon:
503.bwaves_r1.3%
508.namd_r  1.58%
overall 0.42%


Thanks,
Di Zhao


> -Original Message-
> From: Di Zhao OS
> Sent: Friday, June 16, 2023 4:51 PM
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH] tree-optimization/110279- Check for nested FMA chains in
> reassoc
> 
> This patch is to fix the regressions found in SPEC2017 fprate cases
>  on aarch64.
> 
> 1. Reused code in pass widening_mul to check for nested FMA chains
>  (those connected by MULT_EXPRs), since re-writing to parallel
>  generates worse codes.
> 
> 2. Avoid re-arrange to produce less FMA chains that can be slow.
> 
> Tested on ampere1 and neoverse-n1, this fixed the regressions in
> 508.namd_r and 510.parest_r 1 copy run. While I'm still collecting data
> on x86 machines we have, I'd like to know what do you think of this.
> 
> (Previously I tried to improve things with FMA by adding a widening_mul
> pass before reassoc2 for it's easier to recognize different patterns
> of FMA chains and decide whether to split them. But I suppose handling
> them all in reassoc pass is more efficient.)
> 
> Thanks,
> Di Zhao
> 
> ---
> gcc/ChangeLog:
> 
> * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Add new parameter.
> Support new mode that merely do the checking.
> (struct fma_transformation_info): Moved to header.
> (class fma_deferring_state): Moved to header.
> (convert_mult_to_fma): Add new parameter.
> * tree-ssa-math-opts.h (struct fma_transformation_info):
> (class fma_deferring_state): Moved from .cc.
> (convert_mult_to_fma): Add function decl.
> * tree-ssa-reassoc.cc (rewrite_expr_tree_parallel):
> (rank_ops_for_fma): Return -1 if nested FMAs are found.
> (reassociate_bb): Avoid rewriting to parallel if nested FMAs are
> found.



0001-Check-for-nested-FMA-chains-in-reassoc.patch
Description: 0001-Check-for-nested-FMA-chains-in-reassoc.patch


Re: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL PASS[PR110560]

2023-07-07 Thread juzhe.zh...@rivai.ai
LGTM. Thanks.



juzhe.zh...@rivai.ai
 
From: Li Xu
Date: 2023-07-07 16:22
To: gcc-patches
CC: kito.cheng; palmer; juzhe.zhong; zhengyu; Li Xu
Subject: [PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL 
PASS[PR110560]
This patch fixes this issue happens on GCC-13.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110560
 
This patch should be backported to GCC-13.
GCC-14 has rewritten this function, so there is no issue.
 
PR target/110560
 
gcc/ChangeLog:
 
* config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Fix bug.
---
gcc/config/riscv/riscv-vsetvl.cc | 4 
1 file changed, 4 insertions(+)
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 35403b5679c..3355ca4e3fb 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1078,6 +1078,10 @@ local_eliminate_vsetvl_insn (const vector_insn_info )
  if (has_vtype_op (i->rtl ()))
{
+   if (!PREV_INSN (i->rtl ()))
+ return;
+   if (!NONJUMP_INSN_P (PREV_INSN (i->rtl (
+ return;
  if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl (
return;
  rtx avl = get_avl (i->rtl ());
-- 
2.17.1
 
 


[PATCH] RISCV: Fix local_eliminate_vsetvl_insn bug in VSETVL PASS[PR110560]

2023-07-07 Thread Li Xu
This patch fixes this issue happens on GCC-13.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110560

This patch should be backported to GCC-13.
GCC-14 has rewritten this function, so there is no issue.

PR target/110560

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Fix bug.
---
 gcc/config/riscv/riscv-vsetvl.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 35403b5679c..3355ca4e3fb 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1078,6 +1078,10 @@ local_eliminate_vsetvl_insn (const vector_insn_info )
 
  if (has_vtype_op (i->rtl ()))
{
+ if (!PREV_INSN (i->rtl ()))
+   return;
+ if (!NONJUMP_INSN_P (PREV_INSN (i->rtl (
+   return;
  if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl (
return;
  rtx avl = get_avl (i->rtl ());
-- 
2.17.1



RE: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Jiang, Haochen via Gcc-patches
> -Original Message-
> From: Hongtao Liu 
> Sent: Friday, July 7, 2023 3:55 PM
> To: Beulich, Jan 
> Cc: haochen.jiang ; Jiang, Haochen
> ; gcc-regress...@gcc.gnu.org; gcc-
> patc...@gcc.gnu.org; Liu, Hongtao 
> Subject: Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-
> assembler-times vpandn 8 on Linux/x86_64
> 
> On Fri, Jul 7, 2023 at 3:50 PM Hongtao Liu  wrote:
> >
> > On Fri, Jul 7, 2023 at 3:50 PM Jan Beulich  wrote:
> > >
> > > On 07.07.2023 09:46, Hongtao Liu wrote:
> > > > On Fri, Jul 7, 2023 at 3:18 PM Jan Beulich via Gcc-regression
> > > >  wrote:
> > > >>
> > > >> On 06.07.2023 13:57, haochen.jiang wrote:
> > > >>> On Linux/x86_64,
> > > >>>
> > > >>> e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
> > > >>> commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
> > > >>> Author: Jan Beulich 
> > > >>> Date:   Wed Jul 5 09:49:16 2023 +0200
> > > >>>
> > > >>> x86: yet more PR target/100711-like splitting
> > > >>>
> > > >>> caused
> > > >>>
> > > >>> FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
> > > >>> FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8
> > > >>
> > > >> I expect the same applies here - -mno-avx512f (or -mno-avx512vl)
> > > >> might
> > > > For this one, we can just add -mno-avx512f to the testcase,it aims
> > > > to optimize pandn for avx2 target.
> > > >> address this failure. But whether that's really the way to go I'm
> > > >> not sure of. Plus of course such adjustments should have been
> > > >> done ahead of time, when it was decided that testing with certain
> > > >> -march= settings is a goal. My changes have merely uncovered the
> prior omissions.
> > > > It's not a standard request, it's just our private tester which is
> > > > used to find gcc bugs and miss-optimizations.
> > > > It sometimes generates false positive reports (usually adding
> > > > -mno-avx512f to the testcase can fix that), hope that's not too
> > > > annoying.
> > >
> > > Wouldn't that then better be done once uniformly for all affected
> > > tests, rather than being discovered piecemeal?
> This also prevents us from finding potential problems.

Yes, -march=cascadelake actually opens AVX512F related features. It sometimes
show the potential problems while sometimes false positive.

I will add a hint in the script email.

Thx,
Haochen

> > >
> > > Anyway, in this case: Since you said you'd take care of the other
> > > test, will/can you do so for the two ones here as well, or am I on the 
> > > hook?
> > I'll do that.
> > >
> > > Jan
> >
> >
> >
> > --
> > BR,
> > Hongtao
> 
> 
> 
> --
> BR,
> Hongtao


[COMMITTED] Implement value/mask tracking for irange.

2023-07-07 Thread Aldy Hernandez via Gcc-patches
Integer ranges (irange) currently track known 0 bits.  We've wanted to
track known 1 bits for some time, and instead of tracking known 0 and
known 1's separately, it has been suggested we track a value/mask pair
similarly to what we do for CCP and RTL.  This patch implements such a
thing.

With this we now track a VALUE integer which are the known values, and
a MASK which tells us which bits contain meaningful information.  This
allows us to fix a handful of enhancement requests, such as PR107043
and PR107053.

There is a 4.48% performance penalty for VRP and 0.42% in overall
compilation for this entire patchset.  It is expected and in line
with the loss incurred when we started tracking known 0 bits.

This patch just provides the value/mask tracking support.  All the
nonzero users (range-op, IPA, CCP, etc), are still using the nonzero
nomenclature.  For that matter, this patch reimplements the nonzero
accessors with the value/mask functionality.  In follow-up patches I
will enhance these passes to use the value/mask information, and
fix the aforementioned PRs.

gcc/ChangeLog:

* data-streamer-in.cc (streamer_read_value_range): Adjust for
value/mask.
* data-streamer-out.cc (streamer_write_vrange): Same.
* range-op.cc (operator_cast::fold_range): Same.
* value-range-pretty-print.cc
(vrange_printer::print_irange_bitmasks): Same.
* value-range-storage.cc (irange_storage::write_lengths_address):
Same.
(irange_storage::set_irange): Same.
(irange_storage::get_irange): Same.
(irange_storage::size): Same.
(irange_storage::dump): Same.
* value-range-storage.h: Same.
* value-range.cc (debug): New.
(irange_bitmask::dump): New.
(add_vrange): Adjust for value/mask.
(irange::operator=): Same.
(irange::set): Same.
(irange::verify_range): Same.
(irange::operator==): Same.
(irange::contains_p): Same.
(irange::irange_single_pair_union): Same.
(irange::union_): Same.
(irange::intersect): Same.
(irange::invert): Same.
(irange::get_nonzero_bits_from_range): Rename to...
(irange::get_bitmask_from_range): ...this.
(irange::set_range_from_nonzero_bits): Rename to...
(irange::set_range_from_bitmask): ...this.
(irange::set_nonzero_bits): Rename to...
(irange::update_bitmask): ...this.
(irange::get_nonzero_bits): Rename to...
(irange::get_bitmask): ...this.
(irange::intersect_nonzero_bits): Rename to...
(irange::intersect_bitmask): ...this.
(irange::union_nonzero_bits): Rename to...
(irange::union_bitmask): ...this.
(irange_bitmask::verify_mask): New.
* value-range.h (class irange_bitmask): New.
(irange_bitmask::set_unknown): New.
(irange_bitmask::unknown_p): New.
(irange_bitmask::irange_bitmask): New.
(irange_bitmask::get_precision): New.
(irange_bitmask::get_nonzero_bits): New.
(irange_bitmask::set_nonzero_bits): New.
(irange_bitmask::operator==): New.
(irange_bitmask::union_): New.
(irange_bitmask::intersect): New.
(class irange): Friend vrange_printer.
(irange::varying_compatible_p): Adjust for bitmask.
(irange::set_varying): Same.
(irange::set_nonzero): Same.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr107009.c: Adjust irange dumping for
value/mask changes.
* gcc.dg/tree-ssa/vrp-unreachable.c: Same.
* gcc.dg/tree-ssa/vrp122.c: Same.
---
 gcc/data-streamer-in.cc   |   6 +-
 gcc/data-streamer-out.cc  |   5 +-
 gcc/range-op.cc   |  16 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr107009.c  |   2 +-
 .../gcc.dg/tree-ssa/vrp-unreachable.c |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp122.c|   2 +-
 gcc/value-range-pretty-print.cc   |  11 +-
 gcc/value-range-storage.cc|  26 +-
 gcc/value-range-storage.h |   2 +-
 gcc/value-range.cc| 248 +++---
 gcc/value-range.h | 153 ++-
 11 files changed, 351 insertions(+), 122 deletions(-)

diff --git a/gcc/data-streamer-in.cc b/gcc/data-streamer-in.cc
index 578c328475f..6e36adc73cc 100644
--- a/gcc/data-streamer-in.cc
+++ b/gcc/data-streamer-in.cc
@@ -241,8 +241,10 @@ streamer_read_value_range (class lto_input_block *ib, 
data_in *data_in,
  int_range<2> tmp (type, lb, ub);
  r.union_ (tmp);
}
-  wide_int nz = streamer_read_wide_int (ib);
-  r.set_nonzero_bits (nz);
+  wide_int value = streamer_read_wide_int (ib);
+  wide_int mask = streamer_read_wide_int (ib);
+  irange_bitmask bm (value, mask);
+  r.update_bitmask (bm);
   return;
 }
   if (is_a  (vr))
diff --git a/gcc/data-streamer-out.cc 

[COMMITTED] The caller to irange::intersect (wide_int, wide_int) must normalize the range.

2023-07-07 Thread Aldy Hernandez via Gcc-patches
Per the function comment, the caller to intersect(wide_int, wide_int)
must handle the mask.  This means it must also normalize the range if
anything changed.

gcc/ChangeLog:

* value-range.cc (irange::intersect): Leave normalization to
caller.
---
 gcc/value-range.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 8e5607a7eeb..fbc0c7a6f82 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1475,6 +1475,8 @@ irange::intersect (const vrange )
return true;
 
   res |= intersect_bitmask (r);
+  if (res)
+   normalize_kind ();
   return res;
 }
 
@@ -1574,7 +1576,7 @@ irange::intersect (const vrange )
 // Multirange intersect for a specified wide_int [lb, ub] range.
 // Return TRUE if intersect changed anything.
 //
-// NOTE: It is the caller's responsibility to intersect the nonzero masks.
+// NOTE: It is the caller's responsibility to intersect the mask.
 
 bool
 irange::intersect (const wide_int& lb, const wide_int& ub)
@@ -1633,7 +1635,8 @@ irange::intersect (const wide_int& lb, const wide_int& ub)
 }
 
   m_kind = VR_RANGE;
-  normalize_kind ();
+  // The caller must normalize and verify the range, as the bitmask
+  // still needs to be handled.
   return true;
 }
 
-- 
2.40.1



[COMMITTED] A singleton irange has all known bits.

2023-07-07 Thread Aldy Hernandez via Gcc-patches
gcc/ChangeLog:

* value-range.cc (irange::get_bitmask_from_range): Return all the
known bits for a singleton.
(irange::set_range_from_bitmask): Set a range of a singleton when
all bits are known.
---
 gcc/value-range.cc | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index fbc0c7a6f82..011bdbdeae6 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1766,10 +1766,19 @@ irange::invert ()
 irange_bitmask
 irange::get_bitmask_from_range () const
 {
+  unsigned prec = TYPE_PRECISION (type ());
   wide_int min = lower_bound ();
   wide_int max = upper_bound ();
+
+  // All the bits of a singleton are known.
+  if (min == max)
+{
+  wide_int mask = wi::zero (prec);
+  wide_int value = lower_bound ();
+  return irange_bitmask (value, mask);
+}
+
   wide_int xorv = min ^ max;
-  unsigned prec = TYPE_PRECISION (type ());
 
   if (xorv != 0)
 xorv = wi::mask (prec - wi::clz (xorv), false, prec);
@@ -1786,6 +1795,14 @@ irange::set_range_from_bitmask ()
   gcc_checking_assert (!undefined_p ());
   if (m_bitmask.unknown_p ())
 return false;
+
+  // If all the bits are known, this is a singleton.
+  if (m_bitmask.mask () == 0)
+{
+  set (m_type, m_bitmask.value (), m_bitmask.value ());
+  return true;
+}
+
   unsigned popcount = wi::popcount (m_bitmask.get_nonzero_bits ());
 
   // If we have only one bit set in the mask, we can figure out the
-- 
2.40.1



Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 3:50 PM Hongtao Liu  wrote:
>
> On Fri, Jul 7, 2023 at 3:50 PM Jan Beulich  wrote:
> >
> > On 07.07.2023 09:46, Hongtao Liu wrote:
> > > On Fri, Jul 7, 2023 at 3:18 PM Jan Beulich via Gcc-regression
> > >  wrote:
> > >>
> > >> On 06.07.2023 13:57, haochen.jiang wrote:
> > >>> On Linux/x86_64,
> > >>>
> > >>> e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
> > >>> commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
> > >>> Author: Jan Beulich 
> > >>> Date:   Wed Jul 5 09:49:16 2023 +0200
> > >>>
> > >>> x86: yet more PR target/100711-like splitting
> > >>>
> > >>> caused
> > >>>
> > >>> FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
> > >>> FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8
> > >>
> > >> I expect the same applies here - -mno-avx512f (or -mno-avx512vl) might
> > > For this one, we can just add -mno-avx512f to the testcase,it aims to
> > > optimize pandn for avx2 target.
> > >> address this failure. But whether that's really the way to go I'm not
> > >> sure of. Plus of course such adjustments should have been done ahead
> > >> of time, when it was decided that testing with certain -march= settings
> > >> is a goal. My changes have merely uncovered the prior omissions.
> > > It's not a standard request, it's just our private tester which is
> > > used to find gcc bugs and miss-optimizations.
> > > It sometimes generates false positive reports (usually adding
> > > -mno-avx512f to the testcase can fix that), hope that's not too
> > > annoying.
> >
> > Wouldn't that then better be done once uniformly for all affected tests,
> > rather than being discovered piecemeal?
This also prevents us from finding potential problems.
> >
> > Anyway, in this case: Since you said you'd take care of the other test,
> > will/can you do so for the two ones here as well, or am I on the hook?
> I'll do that.
> >
> > Jan
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao


Re: [PATCH] Vect: use a small step to calculate induction for the unrolled loop (PR tree-optimization/110449)

2023-07-07 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
>> Am 06.07.2023 um 19:50 schrieb Richard Sandiford :
>> 
>> Richard Biener via Gcc-patches  writes:
 On Wed, Jul 5, 2023 at 8:44 AM Hao Liu OS via Gcc-patches
  wrote:
 
 Hi,
 
 If a loop is unrolled by n times during vectoriation, two steps are used to
 calculate the induction variable:
  - The small step for the unrolled ith-copy: vec_1 = vec_iv + (VF/n * Step)
  - The large step for the whole loop: vec_loop = vec_iv + (VF * Step)
 
 This patch calculates an extra vec_n to replace vec_loop:
  vec_n = vec_prev + (VF/n * S) = vec_iv + (VF/n * S) * n = vec_loop.
 
 So that we can save the large step register and related operations.
>>> 
>>> OK.  It would be nice to avoid the dead stmts created earlier though.
>> 
>> FWIW, I still don't think we should do this.  Part of the point of
>> unrolling is to shorten loop-carried dependencies, whereas this patch
>> is going in the opposite direction.
>
> Note ncopies can be >1 without additional unrolling.

Yeah, true.  But I think even there, avoiding a longer loop-carried
dependency should be a good thing.

> With non VLA vectors all of the updates will be constant folded btw.

Are you sure?  The motivating example is an Advanced SIMD one,
not a VLA one.  No variable-length vectors are involved.

Maybe constant folding caps the dependency chain to length 2?
But 2 is still more than 1. :)

Thanks,
Richard

>
> Richard 
>
>> Richard
>> 
>>> 
>>> Thanks,
>>> Richard.
>>> 
 gcc/ChangeLog:
 
PR tree-optimization/110449
* tree-vect-loop.cc (vectorizable_induction): use vec_n to replace
vec_loop for the unrolled loop.
 
 gcc/testsuite/ChangeLog:
 
* gcc.target/aarch64/pr110449.c: New testcase.
 ---
 gcc/testsuite/gcc.target/aarch64/pr110449.c | 40 +
 gcc/tree-vect-loop.cc   | 21 +--
 2 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr110449.c
 
 diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c 
 b/gcc/testsuite/gcc.target/aarch64/pr110449.c
 new file mode 100644
 index 000..bb3b6dcfe08
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
 @@ -0,0 +1,40 @@
 +/* { dg-do compile } */
 +/* { dg-options "-Ofast -mcpu=neoverse-n2 --param 
 aarch64-vect-unroll-limit=2" } */
 +/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
 +
 +/* Calcualte the vectorized induction with smaller step for an unrolled 
 loop.
 +
 +   before (suggested_unroll_factor=2):
 + fmovs30, 8.0e+0
 + fmovs31, 4.0e+0
 + dup v27.4s, v30.s[0]
 + dup v28.4s, v31.s[0]
 + .L6:
 + mov v30.16b, v31.16b
 + faddv31.4s, v31.4s, v27.4s
 + faddv29.4s, v30.4s, v28.4s
 + stp q30, q29, [x0]
 + add x0, x0, 32
 + cmp x1, x0
 + bne .L6
 +
 +   after:
 + fmovs31, 4.0e+0
 + dup v29.4s, v31.s[0]
 + .L6:
 + faddv30.4s, v31.4s, v29.4s
 + stp q31, q30, [x0]
 + add x0, x0, 32
 + faddv31.4s, v29.4s, v30.4s
 + cmp x0, x1
 + bne .L6  */
 +
 +void
 +foo2 (float *arr, float freq, float step)
 +{
 +  for (int i = 0; i < 1024; i++)
 +{
 +  arr[i] = freq;
 +  freq += step;
 +}
 +}
 diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
 index 3b46c58a8d8..706ecbffd0c 100644
 --- a/gcc/tree-vect-loop.cc
 +++ b/gcc/tree-vect-loop.cc
 @@ -10114,7 +10114,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   new_vec, step_vectype, NULL);
 
   vec_def = induc_def;
 -  for (i = 1; i < ncopies; i++)
 +  for (i = 1; i < ncopies + 1; i++)
{
  /* vec_i = vec_prev + vec_step  */
  gimple_seq stmts = NULL;
 @@ -10124,8 +10124,23 @@ vectorizable_induction (loop_vec_info loop_vinfo,
  vec_def = gimple_convert (, vectype, vec_def);
 
  gsi_insert_seq_before (, stmts, GSI_SAME_STMT);
 - new_stmt = SSA_NAME_DEF_STMT (vec_def);
 - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
 + if (i < ncopies)
 +   {
 + new_stmt = SSA_NAME_DEF_STMT (vec_def);
 + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
 +   }
 + else
 +   {
 + /* vec_1 = vec_iv + (VF/n * S)
 +vec_2 = vec_1 + (VF/n * S)
 +...
 +vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop
 +

[PATCH] testsuite: Add _link flavor for several arm_arch* and arm* effective-targets

2023-07-07 Thread Christophe Lyon via Gcc-patches
For arm targets, we generate many effective-targets with
check_effective_target_FUNC_multilib and
check_effective_target_arm_arch_FUNC_multilib which check if we can
link and execute a simple program with a given set of flags/multilibs.

In some cases however, it's possible to link but not to execute a
program, so this patch adds similar _link effective-targets which only
check if link succeeds.

The patch does not uupdate the documentation as it already lacks the
numerous existing related effective-targets.

2023-07-07  Christophe Lyon  

gcc/testsuite/
* lib/target-supports.exp (arm_*FUNC_link): New effective-targets.
---
 gcc/testsuite/lib/target-supports.exp | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index c04db2be7f9..d33bc077418 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5129,6 +5129,14 @@ foreach { armfunc armflag armdefs } {
return "$flags FLAG"
}
 
+proc check_effective_target_arm_arch_FUNC_link { } {
+   return [check_no_compiler_messages arm_arch_FUNC_link executable {
+   #include 
+   int dummy;
+   int main (void) { return 0; }
+   } [add_options_for_arm_arch_FUNC ""]]
+   }
+
proc check_effective_target_arm_arch_FUNC_multilib { } {
return [check_runtime arm_arch_FUNC_multilib {
int
@@ -5906,6 +5914,7 @@ proc add_options_for_arm_v8_2a_bf16_neon { flags } {
 #   arm_v8m_main_cde: Armv8-m CDE (Custom Datapath Extension).
 #   arm_v8m_main_cde_fp: Armv8-m CDE with FP registers.
 #   arm_v8_1m_main_cde_mve: Armv8.1-m CDE with MVE.
+#   arm_v8_1m_main_cde_mve_fp: Armv8.1-m CDE with MVE with FP support.
 # Usage:
 #   /* { dg-require-effective-target arm_v8m_main_cde_ok } */
 #   /* { dg-add-options arm_v8m_main_cde } */
@@ -5965,6 +5974,24 @@ foreach { armfunc armflag armdef arminc } {
return "$flags $et_FUNC_flags"
}
 
+proc check_effective_target_FUNC_link { } {
+   if { ! [check_effective_target_FUNC_ok] } {
+   return 0;
+   }
+   return [check_no_compiler_messages FUNC_link executable {
+   #if !(DEF)
+   #error "DEF failed"
+   #endif
+   #include 
+   INC
+   int
+   main (void)
+   {
+   return 0;
+   }
+   } [add_options_for_FUNC ""]]
+   }
+
proc check_effective_target_FUNC_multilib { } {
if { ! [check_effective_target_FUNC_ok] } {
return 0;
-- 
2.34.1



[PATCH] doc: Document arm_v8_1m_main_cde_mve_fp

2023-07-07 Thread Christophe Lyon via Gcc-patches
The arm_v8_1m_main_cde_mve_fp family of effective targets was not
documented when it was introduced.

2023-07-07  Christophe Lyon  

gcc/
* doc/sourcebuild.texi (arm_v8_1m_main_cde_mve_fp): Document.
---
 gcc/doc/sourcebuild.texi | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 526020c7511..03fb2394705 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2190,6 +2190,12 @@ ARM target supports options to generate instructions 
from ARMv8.1-M with
 the Custom Datapath Extension (CDE) and M-Profile Vector Extension (MVE).
 Some multilibs may be incompatible with these options.
 
+@item arm_v8_1m_main_cde_mve_fp
+ARM target supports options to generate instructions from ARMv8.1-M
+with the Custom Datapath Extension (CDE) and M-Profile Vector
+Extension (MVE) with floating-point support.  Some multilibs may be
+incompatible with these options.
+
 @item arm_pacbti_hw
 Test system supports executing Pointer Authentication and Branch Target
 Identification instructions.
-- 
2.34.1



Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 3:50 PM Jan Beulich  wrote:
>
> On 07.07.2023 09:46, Hongtao Liu wrote:
> > On Fri, Jul 7, 2023 at 3:18 PM Jan Beulich via Gcc-regression
> >  wrote:
> >>
> >> On 06.07.2023 13:57, haochen.jiang wrote:
> >>> On Linux/x86_64,
> >>>
> >>> e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
> >>> commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
> >>> Author: Jan Beulich 
> >>> Date:   Wed Jul 5 09:49:16 2023 +0200
> >>>
> >>> x86: yet more PR target/100711-like splitting
> >>>
> >>> caused
> >>>
> >>> FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
> >>> FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8
> >>
> >> I expect the same applies here - -mno-avx512f (or -mno-avx512vl) might
> > For this one, we can just add -mno-avx512f to the testcase,it aims to
> > optimize pandn for avx2 target.
> >> address this failure. But whether that's really the way to go I'm not
> >> sure of. Plus of course such adjustments should have been done ahead
> >> of time, when it was decided that testing with certain -march= settings
> >> is a goal. My changes have merely uncovered the prior omissions.
> > It's not a standard request, it's just our private tester which is
> > used to find gcc bugs and miss-optimizations.
> > It sometimes generates false positive reports (usually adding
> > -mno-avx512f to the testcase can fix that), hope that's not too
> > annoying.
>
> Wouldn't that then better be done once uniformly for all affected tests,
> rather than being discovered piecemeal?
>
> Anyway, in this case: Since you said you'd take care of the other test,
> will/can you do so for the two ones here as well, or am I on the hook?
I'll do that.
>
> Jan



-- 
BR,
Hongtao


Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Jan Beulich via Gcc-patches
On 07.07.2023 09:46, Hongtao Liu wrote:
> On Fri, Jul 7, 2023 at 3:18 PM Jan Beulich via Gcc-regression
>  wrote:
>>
>> On 06.07.2023 13:57, haochen.jiang wrote:
>>> On Linux/x86_64,
>>>
>>> e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
>>> commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
>>> Author: Jan Beulich 
>>> Date:   Wed Jul 5 09:49:16 2023 +0200
>>>
>>> x86: yet more PR target/100711-like splitting
>>>
>>> caused
>>>
>>> FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
>>> FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8
>>
>> I expect the same applies here - -mno-avx512f (or -mno-avx512vl) might
> For this one, we can just add -mno-avx512f to the testcase,it aims to
> optimize pandn for avx2 target.
>> address this failure. But whether that's really the way to go I'm not
>> sure of. Plus of course such adjustments should have been done ahead
>> of time, when it was decided that testing with certain -march= settings
>> is a goal. My changes have merely uncovered the prior omissions.
> It's not a standard request, it's just our private tester which is
> used to find gcc bugs and miss-optimizations.
> It sometimes generates false positive reports (usually adding
> -mno-avx512f to the testcase can fix that), hope that's not too
> annoying.

Wouldn't that then better be done once uniformly for all affected tests,
rather than being discovered piecemeal?

Anyway, in this case: Since you said you'd take care of the other test,
will/can you do so for the two ones here as well, or am I on the hook?

Jan


Re: [r14-2310 Regression] FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2 on Linux/x86_64

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 3:34 PM Jan Beulich  wrote:
>
> On 07.07.2023 09:30, Hongtao Liu wrote:
> > On Fri, Jul 7, 2023 at 3:13 PM Jan Beulich via Gcc-regression
> >  wrote:
> >>
> >> On 06.07.2023 13:57, haochen.jiang wrote:
> >>> On Linux/x86_64,
> >>>
> >>> 2d11c99dfca3cc603dbbfafb3afc41689a68e40f is the first bad commit
> >>> commit 2d11c99dfca3cc603dbbfafb3afc41689a68e40f
> >>> Author: Jan Beulich 
> >>> Date:   Wed Jul 5 09:41:09 2023 +0200
> >>>
> >>> x86: use VPTERNLOG also for certain andnot forms
> >>>
> >>> caused
> >>>
> >>> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-not vpternlogq[ \\t]
> >>
> >> The respective expectation was never valid to add without excluding
> >> cases where -march= overrides (extends) the -msse2 that the test
> >> specifies explicitly. I'm afraid I don't know how to tweak a testcase
> >> to properly deal with that. Perhaps (like iirc was suggested elsewhere)
> >> -mno-avx512f, but honestly this approach feels clumsy to me. Cc-ing
> >> Hongtao, who I think suggested that approach elsewhere.
> >>
> >>> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2
> > There're a false dependence when using pternlog for andnot(and other
> > newly added) pattern, i'm working on a patch to avoid that(PR110438).
> > Let me handle the test case.
>
> Of course I'm happy to see you handle the testcase, but if you don't
> mind I'm curious towards the connection you see between that false
> dependency issue and the adjustments missing in this (and other)
> testcase(s).
For the sake of simplicity, add -mno-avx512f should be ok, the
testcase is used to detect optimization on non-avx512 targets.
I'll add extra testcases to cover false dependence case.
>
> Jan



-- 
BR,
Hongtao


Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 3:18 PM Jan Beulich via Gcc-regression
 wrote:
>
> On 06.07.2023 13:57, haochen.jiang wrote:
> > On Linux/x86_64,
> >
> > e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
> > commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
> > Author: Jan Beulich 
> > Date:   Wed Jul 5 09:49:16 2023 +0200
> >
> > x86: yet more PR target/100711-like splitting
> >
> > caused
> >
> > FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
> > FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8
>
> I expect the same applies here - -mno-avx512f (or -mno-avx512vl) might
For this one, we can just add -mno-avx512f to the testcase,it aims to
optimize pandn for avx2 target.
> address this failure. But whether that's really the way to go I'm not
> sure of. Plus of course such adjustments should have been done ahead
> of time, when it was decided that testing with certain -march= settings
> is a goal. My changes have merely uncovered the prior omissions.
It's not a standard request, it's just our private tester which is
used to find gcc bugs and miss-optimizations.
It sometimes generates false positive reports (usually adding
-mno-avx512f to the testcase can fix that), hope that's not too
annoying.
>
> Jan
>
> > with GCC configured with
> >
> > ../../gcc/configure 
> > --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-2314/usr 
> > --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> > --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet 
> > --without-isl --enable-libmpx x86_64-linux --disable-bootstrap
> >
> > To reproduce:
> >
> > $ cd {build_dir}/gcc && make check 
> > RUNTESTFLAGS="i386.exp=gcc.target/i386/pr100711-1.c 
> > --target_board='unix{-m32\ -march=cascadelake}'"
> > $ cd {build_dir}/gcc && make check 
> > RUNTESTFLAGS="i386.exp=gcc.target/i386/pr100711-2.c 
> > --target_board='unix{-m32\ -march=cascadelake}'"
> >
> > (Please do not reply to this email, for question about this report, contact 
> > me at haochen dot jiang at intel.com)
>


-- 
BR,
Hongtao


Re: [r14-2310 Regression] FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2 on Linux/x86_64

2023-07-07 Thread Jan Beulich via Gcc-patches
On 07.07.2023 09:30, Hongtao Liu wrote:
> On Fri, Jul 7, 2023 at 3:13 PM Jan Beulich via Gcc-regression
>  wrote:
>>
>> On 06.07.2023 13:57, haochen.jiang wrote:
>>> On Linux/x86_64,
>>>
>>> 2d11c99dfca3cc603dbbfafb3afc41689a68e40f is the first bad commit
>>> commit 2d11c99dfca3cc603dbbfafb3afc41689a68e40f
>>> Author: Jan Beulich 
>>> Date:   Wed Jul 5 09:41:09 2023 +0200
>>>
>>> x86: use VPTERNLOG also for certain andnot forms
>>>
>>> caused
>>>
>>> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-not vpternlogq[ \\t]
>>
>> The respective expectation was never valid to add without excluding
>> cases where -march= overrides (extends) the -msse2 that the test
>> specifies explicitly. I'm afraid I don't know how to tweak a testcase
>> to properly deal with that. Perhaps (like iirc was suggested elsewhere)
>> -mno-avx512f, but honestly this approach feels clumsy to me. Cc-ing
>> Hongtao, who I think suggested that approach elsewhere.
>>
>>> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2
> There're a false dependence when using pternlog for andnot(and other
> newly added) pattern, i'm working on a patch to avoid that(PR110438).
> Let me handle the test case.

Of course I'm happy to see you handle the testcase, but if you don't
mind I'm curious towards the connection you see between that false
dependency issue and the adjustments missing in this (and other)
testcase(s).

Jan


Re: [r14-2310 Regression] FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2 on Linux/x86_64

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 3:13 PM Jan Beulich via Gcc-regression
 wrote:
>
> On 06.07.2023 13:57, haochen.jiang wrote:
> > On Linux/x86_64,
> >
> > 2d11c99dfca3cc603dbbfafb3afc41689a68e40f is the first bad commit
> > commit 2d11c99dfca3cc603dbbfafb3afc41689a68e40f
> > Author: Jan Beulich 
> > Date:   Wed Jul 5 09:41:09 2023 +0200
> >
> > x86: use VPTERNLOG also for certain andnot forms
> >
> > caused
> >
> > FAIL: gcc.target/i386/pr53652-1.c scan-assembler-not vpternlogq[ \\t]
>
> The respective expectation was never valid to add without excluding
> cases where -march= overrides (extends) the -msse2 that the test
> specifies explicitly. I'm afraid I don't know how to tweak a testcase
> to properly deal with that. Perhaps (like iirc was suggested elsewhere)
> -mno-avx512f, but honestly this approach feels clumsy to me. Cc-ing
> Hongtao, who I think suggested that approach elsewhere.
>
> > FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2
There're a false dependence when using pternlog for andnot(and other
newly added) pattern, i'm working on a patch to avoid that(PR110438).
Let me handle the test case.
>
> Aiui this is merely a knock-on effect.
>
> Jan
>
> > with GCC configured with
> >
> > ../../gcc/configure 
> > --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-2310/usr 
> > --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> > --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet 
> > --without-isl --enable-libmpx x86_64-linux --disable-bootstrap
> >
> > To reproduce:
> >
> > $ cd {build_dir}/gcc && make check 
> > RUNTESTFLAGS="i386.exp=gcc.target/i386/pr53652-1.c 
> > --target_board='unix{-m32\ -march=cascadelake}'"
> > $ cd {build_dir}/gcc && make check 
> > RUNTESTFLAGS="i386.exp=gcc.target/i386/pr53652-1.c 
> > --target_board='unix{-m64\ -march=cascadelake}'"
> >
> > (Please do not reply to this email, for question about this report, contact 
> > me at haochen dot jiang at intel.com)
>


-- 
BR,
Hongtao


Re: [r14-2314 Regression] FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8 on Linux/x86_64

2023-07-07 Thread Jan Beulich via Gcc-patches
On 06.07.2023 13:57, haochen.jiang wrote:
> On Linux/x86_64,
> 
> e007369c8b67bcabd57c4fed8cff2a6db82e78e6 is the first bad commit
> commit e007369c8b67bcabd57c4fed8cff2a6db82e78e6
> Author: Jan Beulich 
> Date:   Wed Jul 5 09:49:16 2023 +0200
> 
> x86: yet more PR target/100711-like splitting
> 
> caused
> 
> FAIL: gcc.target/i386/pr100711-1.c scan-assembler-times pandn 2
> FAIL: gcc.target/i386/pr100711-2.c scan-assembler-times vpandn 8

I expect the same applies here - -mno-avx512f (or -mno-avx512vl) might
address this failure. But whether that's really the way to go I'm not
sure of. Plus of course such adjustments should have been done ahead
of time, when it was decided that testing with certain -march= settings
is a goal. My changes have merely uncovered the prior omissions.

Jan

> with GCC configured with
> 
> ../../gcc/configure 
> --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-2314/usr 
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
> 
> To reproduce:
> 
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="i386.exp=gcc.target/i386/pr100711-1.c 
> --target_board='unix{-m32\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="i386.exp=gcc.target/i386/pr100711-2.c 
> --target_board='unix{-m32\ -march=cascadelake}'"
> 
> (Please do not reply to this email, for question about this report, contact 
> me at haochen dot jiang at intel.com)



Re: [r14-2310 Regression] FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2 on Linux/x86_64

2023-07-07 Thread Jan Beulich via Gcc-patches
On 06.07.2023 13:57, haochen.jiang wrote:
> On Linux/x86_64,
> 
> 2d11c99dfca3cc603dbbfafb3afc41689a68e40f is the first bad commit
> commit 2d11c99dfca3cc603dbbfafb3afc41689a68e40f
> Author: Jan Beulich 
> Date:   Wed Jul 5 09:41:09 2023 +0200
> 
> x86: use VPTERNLOG also for certain andnot forms
> 
> caused
> 
> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-not vpternlogq[ \\t]

The respective expectation was never valid to add without excluding
cases where -march= overrides (extends) the -msse2 that the test
specifies explicitly. I'm afraid I don't know how to tweak a testcase
to properly deal with that. Perhaps (like iirc was suggested elsewhere)
-mno-avx512f, but honestly this approach feels clumsy to me. Cc-ing
Hongtao, who I think suggested that approach elsewhere.

> FAIL: gcc.target/i386/pr53652-1.c scan-assembler-times pandn[ \\t] 2

Aiui this is merely a knock-on effect.

Jan

> with GCC configured with
> 
> ../../gcc/configure 
> --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-2310/usr 
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
> 
> To reproduce:
> 
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="i386.exp=gcc.target/i386/pr53652-1.c --target_board='unix{-m32\ 
> -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="i386.exp=gcc.target/i386/pr53652-1.c --target_board='unix{-m64\ 
> -march=cascadelake}'"
> 
> (Please do not reply to this email, for question about this report, contact 
> me at haochen dot jiang at intel.com)



[PATCH] mklog: handle Signed-Off-By, minor cleanup

2023-07-07 Thread Marc Poulhiès via Gcc-patches
Consider Signed-off-by lines as part of the ending of the initial
commit to avoid having these in the middle of the log when the
changelog part is injected after.

This is particularly usefull with:

 $ git gcc-commit-mklog --amend -s

that can be used to create the changelog and add the Signed-Off-By line.

Also applies most of the shellcheck suggestions on the
prepare-commit-msg hook.

Signed-off-by: Marc Poulhiès 
---

This command is used in particular during the dev of the frontend
for the Rust language (see r13-7099-g4b25fc15b925f8 as an example).

Ok for master?

 contrib/mklog.py   | 34 +-
 contrib/prepare-commit-msg | 20 ++--
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/contrib/mklog.py b/contrib/mklog.py
index 777212c98d7..e5cc69e0d0a 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -41,7 +41,34 @@ from unidiff import PatchSet
 
 LINE_LIMIT = 100
 TAB_WIDTH = 8
-CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
+
+# Initial commit:
+#   +--+
+#   | gccrs: Some title|
+#   |  | This is the "start"
+#   | This is some text explaining the commit. |
+#   | There can be several lines.  |
+#   |  |<--->
+#   | Signed-off-by: My Name  | This is the "end"
+#   +--+
+#
+# Results in:
+#   +--+
+#   | gccrs: Some title|
+#   |  |
+#   | This is some text explaining the commit. | This is the "start"
+#   | There can be several lines.  |
+#   |  |<--->
+#   | gcc/rust/ChangeLog:  |
+#   |  | This is the generated
+#   | * some_file (bla):   | ChangeLog part
+#   | (foo):   |
+#   |  |<--->
+#   | Signed-off-by: My Name  | This is the "end"
+#   +--+
+
+# this regex matches the first line of the "end" in the initial commit message
+FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by|co-authored-by|#): ')
 
 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?PPR [a-z+-]+\/[0-9]+)')
 prnum_regex = re.compile(r'PR (?P[a-z+-]+)/(?P[0-9]+)')
@@ -330,10 +357,7 @@ def update_copyright(data):
 
 
 def skip_line_in_changelog(line):
-if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
-return False
-return True
-
+return FIRST_LINE_OF_END_RE.match(line) == None
 
 if __name__ == '__main__':
 extra_args = os.getenv('GCC_MKLOG_ARGS')
diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg
index 48c9dad3c6f..1e94706ba40 100755
--- a/contrib/prepare-commit-msg
+++ b/contrib/prepare-commit-msg
@@ -32,11 +32,11 @@ if ! [ -f "$COMMIT_MSG_FILE" ]; then exit 0; fi
 # Don't do anything unless requested to.
 if [ -z "$GCC_FORCE_MKLOG" ]; then exit 0; fi
 
-if [ -z "$COMMIT_SOURCE" ] || [ $COMMIT_SOURCE = template ]; then
+if [ -z "$COMMIT_SOURCE" ] || [ "$COMMIT_SOURCE" = template ]; then
 # No source or "template" means new commit.
 cmd="diff --cached"
 
-elif [ $COMMIT_SOURCE = message ]; then
+elif [ "$COMMIT_SOURCE" = message ]; then
 # "message" means -m; assume a new commit if there are any changes staged.
 if ! git diff --cached --quiet; then
cmd="diff --cached"
@@ -44,23 +44,23 @@ elif [ $COMMIT_SOURCE = message ]; then
cmd="diff --cached HEAD^"
 fi
 
-elif [ $COMMIT_SOURCE = commit ]; then
+elif [ "$COMMIT_SOURCE" = commit ]; then
 # The message of an existing commit.  If it's HEAD, assume --amend;
 # otherwise, assume a new commit with -C.
-if [ $SHA1 = HEAD ]; then
+if [ "$SHA1" = HEAD ]; then
cmd="diff --cached HEAD^"
if [ "$(git config gcc-config.mklog-hook-type)" = "smart-amend" ]; then
# Check if the existing message still describes the staged changes.
f=$(mktemp /tmp/git-commit.XX) || exit 1
-   git log -1 --pretty=email HEAD > $f
-   printf '\n---\n\n' >> $f
-   git $cmd >> $f
+   git log -1 --pretty=email HEAD > "$f"
+   printf '\n---\n\n' >> "$f"
+   git $cmd >> "$f"
if contrib/gcc-changelog/git_email.py "$f" >/dev/null 2>&1; then
# Existing commit message is still OK for amended commit.
-   rm $f
+   rm "$f"
exit 0
fi
-   rm $f
+   rm "$f"
fi
 else
cmd="diff --cached"

Re: [PATCH] Break false dependence for vpternlog by inserting vpxor.

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Thu, Jul 6, 2023 at 11:46 PM  wrote:
>
> > +; False dependency happens on destination register which is not really
> > +; used when moving all ones to vector register
> > +(define_split
> > +  [(set (match_operand:VMOVE 0 "register_operand")
> > + (match_operand:VMOVE 1 "int_float_vector_all_ones_operand"))]
> > +  "TARGET_AVX512F && reload_completed
> > +  && ( == 64 || EXT_REX_SSE_REG_P (operands[0]))"
> > +  [(set (match_dup 0) (match_dup 2))
> > +   (parallel
> > + [(set (match_dup 0) (match_dup 1))
> > +  (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
> > +  "operands[2] = CONST0_RTX (mode);")
>
> I think we shouldnt emit PXOR when optimizing for size. So should change
> define_split:
> define_split
>[(set (match_operand:VMOVE 0 "register_operand")
> (match_operand:VMOVE 1 "int_float_vector_all_ones_operand"))]
>"TARGET_AVX512F && reload_completed
>&& ( == 64 || EXT_REX_SSE_REG_P (operands[0]))
>&& optimize_insn_for_speed_p ()"
>[(set (match_dup 0) (match_dup 2))
> (parallel
>   [(set (match_dup 0) (match_dup 1))
>(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
>"operands[2] = CONST0_RTX (mode);")
Yes, will do. I'm still working on breaking the false depence for
pternlog in newly added pattern *iornot3,*xnor3 and
*3.
Will repost the patch when it's done.



-- 
BR,
Hongtao


Re: [PATCH 3/3] testsuite: Require vectors of doubles for pr97428.c

2023-07-07 Thread Richard Biener via Gcc-patches
On Thu, Jul 6, 2023 at 11:37 PM Maciej W. Rozycki  wrote:
>
> The pr97428.c test assumes support for vectors of doubles, but some
> targets only support vectors of floats, causing this test to fail with
> such targets.  Limit this test to targets that support vectors of
> doubles then.

OK.

> gcc/testsuite/
> * gcc.dg/vect/pr97428.c: Limit to `vect_double' targets.
> ---
>  gcc/testsuite/gcc.dg/vect/pr97428.c |1 +
>  1 file changed, 1 insertion(+)
>
> gcc-test-pr97428-vect-double.diff
> Index: gcc/gcc/testsuite/gcc.dg/vect/pr97428.c
> ===
> --- gcc.orig/gcc/testsuite/gcc.dg/vect/pr97428.c
> +++ gcc/gcc/testsuite/gcc.dg/vect/pr97428.c
> @@ -1,4 +1,5 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target vect_double } */
>
>  typedef struct { double re, im; } dcmlx_t;
>  typedef struct { double re[4], im[4]; } dcmlx4_t;


Re: [PATCH V2] [x86] Add pre_reload splitter to detect fp min/max pattern.

2023-07-07 Thread Hongtao Liu via Gcc-patches
On Fri, Jul 7, 2023 at 2:02 PM Uros Bizjak via Gcc-patches
 wrote:
>
> On Fri, Jul 7, 2023 at 7:31 AM liuhongt  wrote:
> >
> > > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > > and the other emitting UNSPEC_IEEE_MIN.
> > Splitted.
> >
> > > The test involves blendv instruction, which is SSE4.1, so it is
> > > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > > -march=x86_64 and use sse4_runtime target selector, as is the case
> > > with gcc.target/i386/pr90358.c.
> > Changed.
> >
> > > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > > the test is valid also for 32bit targets, you should use -msseregparm
> > > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > > testcase) in the same way as -mregparm to pass SSE arguments in
> > > registers.
> > 32-bit target still failed to do condition elimination for DFmode due to
> > below code in rtx_cost
> >
> >   /* A size N times larger than UNITS_PER_WORD likely needs N times as
> >  many insns, taking N times as long.  */
> >   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
> >
> > It looks like a separate issue for DFmode operation under 32-bit target.
> >
> > I've enable 32-bit for the testcase, but only scan for minss/maxss
> > currently.
> >
> > Here's updated patch.
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> > it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> > the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> > and it failed ix86_expand_sse_fp_minmax.
> >
> > This patch adds pre_reload splitter to detect the min/max pattern.
> >
> > Operands order in MINSS matters for signed zero and NANs, since the
> > instruction always returns second operand when any operand is NAN or
> > both operands are zero.
> >
> > gcc/ChangeLog:
> >
> > PR target/110170
> > * config/i386/i386.md (*ieee_max3_1): New pre_reload
> > splitter to detect fp max pattern.
> > (*ieee_min3_1): Ditto, but for fp min pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.target/i386/pr110170.C: New test.
> > * gcc.target/i386/pr110170.c: New test.
>
> OK with a testcase fix below.
>
> Uros.
>
> > ---
> >  gcc/config/i386/i386.md  | 43 +
> >  gcc/testsuite/g++.target/i386/pr110170.C | 78 
> >  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++
> >  3 files changed, 142 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index a82cc353cfd..6f415f899ae 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s3"
> > (set_attr "type" "sseadd")
> > (set_attr "mode" "")])
> >
> > +;; Operands order in min/max instruction matters for signed zero and NANs.
> > +(define_insn_and_split "*ieee_max3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +   (unspec:MODEF
> > + [(match_operand:MODEF 1 "register_operand")
> > +  (match_operand:MODEF 2 "register_operand")
> > +  (lt:MODEF
> > +(match_operand:MODEF 3 "register_operand")
> > +(match_operand:MODEF 4 "register_operand"))]
> > + UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[3])
> > +  && rtx_equal_p (operands[2], operands[4]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +   (unspec:MODEF
> > + [(match_dup 2)
> > +  (match_dup 1)]
> > +UNSPEC_IEEE_MAX))])
> > +
> > +(define_insn_and_split "*ieee_min3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +   (unspec:MODEF
> > + [(match_operand:MODEF 1 "register_operand")
> > +  (match_operand:MODEF 2 "register_operand")
> > +  (lt:MODEF
> > +(match_operand:MODEF 3 "register_operand")
> > +(match_operand:MODEF 4 "register_operand"))]
> > + UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[4])
> > +  && rtx_equal_p (operands[2], operands[3]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +   (unspec:MODEF
> > + [(match_dup 2)
> > +  (match_dup 1)]
> > +UNSPEC_IEEE_MIN))])
> > +
> >  ;; Make two stack loads independent:
> >  ;;   fld aa  fld aa
> >  ;;   fld %st(0) ->   fld bb
> > diff --git a/gcc/testsuite/g++.target/i386/pr110170.C 
> > b/gcc/testsuite/g++.target/i386/pr110170.C
> > new file mode 100644
> > index 000..5d6842270d0
> > --- 

Re: [PATCH 2/3] testsuite: Require 128-bit vectors for bb-slp-pr95839.c

2023-07-07 Thread Richard Biener via Gcc-patches
On Thu, Jul 6, 2023 at 11:37 PM Maciej W. Rozycki  wrote:
>
> The bb-slp-pr95839.c test assumes quad-single float vector support, but
> some targets only support pairs of floats, causing this test to fail
> with such targets.  Limit this test to targets that support at least
> 128-bit vectors then, and add a complementing test that can be run with
> targets that have support for 64-bit vectors only.  There is no need to
> adjust bb-slp-pr95839-2.c as 128 bits are needed even for the smallest
> vector of doubles, so support is implied by the presence of vectors of
> doubles.

I wonder why you see the testcase FAIL, on x86-64 when doing

typedef float __attribute__((vector_size(32))) v4f32;

v4f32 f(v4f32 a, v4f32 b)
{
  /* Check that we vectorize this CTOR without any loads.  */
  return (v4f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3],
  a[4] + b[4], a[5] + b[5], a[6] + b[6], a[7] + b[7]};
}

I see we vectorize the add and the "store".  We fail to perform
extraction from the incoming vectors (unless you enable AVX),
that's a missed optimization.

So with paired floats I would expect sth similar?  Maybe
x86 is saved by kind-of-presence (but disabled) of V8SFmode vectors.

That said, we should handle this better so can you file an
enhancement bugreport for this?

Thanks,
Richard.

> gcc/testsuite/
> * gcc.dg/vect/bb-slp-pr95839.c: Limit to `vect128' targets.
> * gcc.dg/vect/bb-slp-pr95839-v8.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-v8.c |   14 ++
>  gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c|1 +
>  2 files changed, 15 insertions(+)
>
> gcc-test-bb-slp-pr95839-vect128.diff
> Index: gcc/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-v8.c
> ===
> --- /dev/null
> +++ gcc/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-v8.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-require-effective-target vect64 } */
> +/* { dg-additional-options "-w -Wno-psabi" } */
> +
> +typedef float __attribute__((vector_size(8))) v2f32;
> +
> +v2f32 f(v2f32 a, v2f32 b)
> +{
> +  /* Check that we vectorize this CTOR without any loads.  */
> +  return (v2f32){a[0] + b[0], a[1] + b[1]};
> +}
> +
> +/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */
> Index: gcc/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
> ===
> --- gcc.orig/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
> +++ gcc/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target vect_float } */
> +/* { dg-require-effective-target vect128 } */
>  /* { dg-additional-options "-w -Wno-psabi" } */
>
>  typedef float __attribute__((vector_size(16))) v4f32;


[PATCH] lto: bypass-asm: Fixed test(U*) used but never defined error.

2023-07-07 Thread Rishi Raj via Gcc-patches
>From 5151cf943987347edbc3707f08f0da8cd9f49f88 Mon Sep 17 00:00:00 2001
From: Rishi Raj 
Date: Fri, 7 Jul 2023 10:15:57 +0530
Subject: [PATCH] lto: Fixed test(U*) used but never defined error.

This Patch fixes the error during bootstrapped build.

Signed-off-by: Rishi Raj 
---
 gcc/lto-object.cc   | 1 +
 gcc/lto/lto-lang.cc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/lto-object.cc b/gcc/lto-object.cc
index 33eca5a7d81..097c81a686e 100644
--- a/gcc/lto-object.cc
+++ b/gcc/lto-object.cc
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "diagnostic-core.h"
 #include "tm.h"
+#include "cgraph.h"
 #include "lto-streamer.h"
 #include "lto-section-names.h"
 #include "simple-object.h"
diff --git a/gcc/lto/lto-lang.cc b/gcc/lto/lto-lang.cc
index cf33bf178c2..35f60325c80 100644
--- a/gcc/lto/lto-lang.cc
+++ b/gcc/lto/lto-lang.cc
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "tree.h"
 #include "gimple.h"
+#include "cgraph.h"
 #include "stringpool.h"
 #include "diagnostic-core.h"
 #include "stor-layout.h"
-- 
2.40.1


Re: [PATCH 1/3] testsuite: Add check for vectors of 128 bits being supported

2023-07-07 Thread Richard Biener via Gcc-patches
On Thu, Jul 6, 2023 at 11:36 PM Maciej W. Rozycki  wrote:
>
> Similarly to checks for vectors of 32 bits and 64 bits being supported
> add one for vectors of 128 bits.

OK

> gcc/testsuite/
> * lib/target-supports.exp (check_effective_target_vect128): New
> procedure.
> ---
>  gcc/testsuite/lib/target-supports.exp |6 ++
>  1 file changed, 6 insertions(+)
>
> gcc-test-effective-target-vect128.diff
> Index: gcc/gcc/testsuite/lib/target-supports.exp
> ===
> --- gcc.orig/gcc/testsuite/lib/target-supports.exp
> +++ gcc/gcc/testsuite/lib/target-supports.exp
> @@ -8599,6 +8599,12 @@ proc check_effective_target_vect_variabl
>  return [expr { [lindex [available_vector_sizes] 0] == 0 }]
>  }
>
> +# Return 1 if the target supports vectors of 128 bits.
> +
> +proc check_effective_target_vect128 { } {
> +return [expr { [lsearch -exact [available_vector_sizes] 128] >= 0 }]
> +}
> +
>  # Return 1 if the target supports vectors of 64 bits.
>
>  proc check_effective_target_vect64 { } {


Re: GGC: Remove 'const char *' 'gt_ggc_mx', 'gt_pch_nx' variants (was: [PATCH] support ggc hash_map and hash_set)

2023-07-07 Thread Richard Biener via Gcc-patches
On Thu, Jul 6, 2023 at 8:53 PM Thomas Schwinge  wrote:
>
> Hi!
>
> On 2014-09-01T21:56:28-0400, tsaund...@mozilla.com wrote:
> > [...] this part [...]
>
> ... became commit b086d5308de0d25444243f482f2f3d1dfd3a9a62
> (Subversion r214834), which added GGC support to 'hash_map', 'hash_set',
> and converted to those a number of 'htab' instances.
>
> It doesn't really interfere with my ongoing work, but I have doubts about
> two functions that were added here:
>
> > --- a/gcc/ggc.h
> > +++ b/gcc/ggc.h
>
> > +static inline void
> > +gt_ggc_mx (const char *s)
> > +{
> > +  ggc_test_and_set_mark (const_cast (s));
> > +}
> > +
> > +static inline void
> > +gt_pch_nx (const char *)
> > +{
> > +}
>
> If (in current sources) I put '__builtin_abort' calls into these
> functions, those don't trigger, so the functions are (currently) unused,
> at least in my configuration.  Moreover, comparing these two to other
> string-related 'gt_ggc_mx' functions in (nowadays) 'gcc/ggc-page.cc', and
> string-related 'gt_pch_nx' functions in (nowadays) 'gcc/stringpool.cc'
> (..., which already did exist back then in 2014), we find that this
> 'gt_ggc_mx' doesn't call 'gt_ggc_m_S', so doesn't get the special string
> handling, and this 'gt_pch_nx' doesn't call 'gt_pch_n_S' and also doesn't
> 'gt_pch_note_object' manually, so I wonder how that ever worked?  So
> maybe these two in fact never were used?  Should we dare to put in the
> attached "GGC: Remove 'const char *' 'gt_ggc_mx', 'gt_pch_nx' variants"?

Are the variants in ggc-page.c/stringpool.cc used?  They don't seem to be
declared anywhere.

I notice that one is for a reference of const char * and one for the value.

But yes, I think we should remove the inlines if they are not needed.

Thanks,
Richard.

>
> Grüße
>  Thomas
>
>
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955


Re: [PATCH] vect: Fix vectorized BIT_FIELD_REF for signed bit-fields [PR110557]

2023-07-07 Thread Richard Biener via Gcc-patches
On Thu, Jul 6, 2023 at 6:18 PM Xi Ruoyao via Gcc-patches
 wrote:
>
> If a bit-field is signed and it's wider than the output type, we must
> ensure the extracted result sign-extended.  But this was not handled
> correctly.
>
> For example:
>
> int x : 8;
> long y : 55;
> bool z : 1;
>
> The vectorized extraction of y was:
>
> vect__ifc__49.29_110 =
>   MEM  [(struct Item *)vectp_a.27_108];
> vect_patt_38.30_112 =
>   vect__ifc__49.29_110 & { 9223372036854775552, 9223372036854775552 };
> vect_patt_39.31_113 = vect_patt_38.30_112 >> 8;
> vect_patt_40.32_114 =
>   VIEW_CONVERT_EXPR(vect_patt_39.31_113);
>
> This is obviously incorrect.  This pach has implemented it as:
>
> vect__ifc__25.16_62 =
>   MEM  [(struct Item *)vectp_a.14_60];
> vect_patt_31.17_63 =
>   VIEW_CONVERT_EXPR(vect__ifc__25.16_62);
> vect_patt_32.18_64 = vect_patt_31.17_63 << 1;
> vect_patt_33.19_65 = vect_patt_32.18_64 >> 9;
>
> gcc/ChangeLog:
>
> PR tree-optimization/110557
> * tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern):
> Ensure the output sign-extended if necessary.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/110557
> * g++.dg/vect/pr110557.cc: New test.
> ---
>
> Bootstrapped and regtested on x86_64-linux-gnu.  Ok for trunk and gcc-13
> branch?
>
>  gcc/testsuite/g++.dg/vect/pr110557.cc | 37 +
>  gcc/tree-vect-patterns.cc | 58 ---
>  2 files changed, 81 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/vect/pr110557.cc
>
> diff --git a/gcc/testsuite/g++.dg/vect/pr110557.cc 
> b/gcc/testsuite/g++.dg/vect/pr110557.cc
> new file mode 100644
> index 000..e1fbe1caac4
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/pr110557.cc
> @@ -0,0 +1,37 @@
> +// { dg-additional-options "-mavx" { target { avx_runtime } } }
> +
> +static inline long
> +min (long a, long b)
> +{
> +  return a < b ? a : b;
> +}
> +
> +struct Item
> +{
> +  int x : 8;
> +  long y : 55;
> +  bool z : 1;
> +};
> +
> +__attribute__ ((noipa)) long
> +test (Item *a, int cnt)
> +{
> +  long size = 0;
> +  for (int i = 0; i < cnt; i++)
> +size = min ((long)a[i].y, size);
> +  return size;
> +}
> +
> +int
> +main ()
> +{
> +  struct Item items[] = {
> +{ 1, -1 },
> +{ 2, -2 },
> +{ 3, -3 },
> +{ 4, -4 },
> +  };
> +
> +  if (test (items, 4) != -4)
> +__builtin_trap ();
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 1bc36b043a0..20412c27ead 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -2566,7 +2566,7 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
> Widening with mask first, shift later:
> container = (type_out) container;
> masked = container & (((1 << bitsize) - 1) << bitpos);
> -   result = patt2 >> masked;
> +   result = masked >> bitpos;
>
> Widening with shift first, mask last:
> container = (type_out) container;
> @@ -2578,6 +2578,15 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
> result = masked >> bitpos;
> result = (type_out) result;
>
> +   If the bitfield is signed and it's wider than type_out, we need to
> +   keep the result sign-extended:
> +   container = (type) container;
> +   masked = container << (prec - bitsize - bitpos);
> +   result = (type_out) (masked >> (prec - bitsize));
> +
> +   Here type is the signed variant of the wider of type_out and the type
> +   of container.
> +
> The shifting is always optional depending on whether bitpos != 0.
>
>  */
> @@ -2636,14 +2645,22 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>if (BYTES_BIG_ENDIAN)
>  shift_n = prec - shift_n - mask_width;
>
> +  bool sign_ext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
> +  TYPE_PRECISION (ret_type) > mask_width);
> +  bool widening = ((TYPE_PRECISION (TREE_TYPE (container)) <
> +   TYPE_PRECISION (ret_type))
> +  && !useless_type_conversion_p (TREE_TYPE (container),
> + ret_type));

the !useless_type_conversion_p check isn't necessary, when TYPE_PRECISION
isn't equal the conversion is never useless.

I'll also note that ret_type == TREE_TYPE (bf_ref).

Can you rename 'widening' to 'load_widen' and 'sign_ext' to 'ref_sext'?  As they
are named it suggest they apply to the same so I originally thought sign_ext
should be widening && !TYPE_UNSIGNED.

Otherwise looks reasonable.

Thanks,
Richard.

> +
>/* We move the conversion earlier if the loaded type is smaller than the
>   return type to enable the use of widening loads.  */
> -  if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
> -  && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
> +  if (sign_ext || widening)
>  {
> -  pattern_stmt
> -   = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),

Re: [PATCH V2] [x86] Add pre_reload splitter to detect fp min/max pattern.

2023-07-07 Thread Uros Bizjak via Gcc-patches
On Fri, Jul 7, 2023 at 7:31 AM liuhongt  wrote:
>
> > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > and the other emitting UNSPEC_IEEE_MIN.
> Splitted.
>
> > The test involves blendv instruction, which is SSE4.1, so it is
> > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > -march=x86_64 and use sse4_runtime target selector, as is the case
> > with gcc.target/i386/pr90358.c.
> Changed.
>
> > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > the test is valid also for 32bit targets, you should use -msseregparm
> > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > testcase) in the same way as -mregparm to pass SSE arguments in
> > registers.
> 32-bit target still failed to do condition elimination for DFmode due to
> below code in rtx_cost
>
>   /* A size N times larger than UNITS_PER_WORD likely needs N times as
>  many insns, taking N times as long.  */
>   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
>
> It looks like a separate issue for DFmode operation under 32-bit target.
>
> I've enable 32-bit for the testcase, but only scan for minss/maxss
> currently.
>
> Here's updated patch.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> gcc/ChangeLog:
>
> PR target/110170
> * config/i386/i386.md (*ieee_max3_1): New pre_reload
> splitter to detect fp max pattern.
> (*ieee_min3_1): Ditto, but for fp min pattern.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/i386/pr110170.C: New test.
> * gcc.target/i386/pr110170.c: New test.

OK with a testcase fix below.

Uros.

> ---
>  gcc/config/i386/i386.md  | 43 +
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 
>  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++
>  3 files changed, 142 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index a82cc353cfd..6f415f899ae 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s3"
> (set_attr "type" "sseadd")
> (set_attr "mode" "")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_max3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +   (unspec:MODEF
> + [(match_operand:MODEF 1 "register_operand")
> +  (match_operand:MODEF 2 "register_operand")
> +  (lt:MODEF
> +(match_operand:MODEF 3 "register_operand")
> +(match_operand:MODEF 4 "register_operand"))]
> + UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[3])
> +  && rtx_equal_p (operands[2], operands[4]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +   (unspec:MODEF
> + [(match_dup 2)
> +  (match_dup 1)]
> +UNSPEC_IEEE_MAX))])
> +
> +(define_insn_and_split "*ieee_min3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +   (unspec:MODEF
> + [(match_operand:MODEF 1 "register_operand")
> +  (match_operand:MODEF 2 "register_operand")
> +  (lt:MODEF
> +(match_operand:MODEF 3 "register_operand")
> +(match_operand:MODEF 4 "register_operand"))]
> + UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[4])
> +  && rtx_equal_p (operands[2], operands[3]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +   (unspec:MODEF
> + [(match_dup 2)
> +  (match_dup 1)]
> +UNSPEC_IEEE_MIN))])
> +
>  ;; Make two stack loads independent:
>  ;;   fld aa  fld aa
>  ;;   fld %st(0) ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C 
> b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 000..5d6842270d0
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */

Please either change the first line to:

{ dg-do run { target sse4_runtime } }

or add

{ dg-require-effective-target sse4_runtime }

to the runtime test.

>