[Patch, v2] gcn/mkoffload.cc: Use #embed for including the generated ELF file

2024-06-21 Thread Tobias Burnus

[I messed up copying from the build system, picking up an old version.
Changes to v1 (bottom of the diff): fopen is no longer required.]

Tobias Burnus wrote:

mkoffload's generated .c file looks much nicer with '#embed'.

This patch depends on Jakub's #embed patch at
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655012.html

It might be a tiny bit faster than currently (or not); however,
once #embed has a large-file mode, it should also speed up
the offloading compilation quit a bit.

OK for mainline, once '#embed' support is in?

Tobiasgcn/mkoffload.cc: Use #embed for including the generated ELF file

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (read_file): Remove.
	(process_obj): Generate C file that uses #embed.
	(main): Update call to it; remove no longer needed file I/O.

 gcc/config/gcn/mkoffload.cc | 72 -
 1 file changed, 12 insertions(+), 60 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a799b..0c840318b2d 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@ xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-{
-  /* Get the file size.  */
-  long s = ftell (stream);
-  if (s >= 0)
-	alloc = s + 100;
-  fseek (stream, 0, SEEK_SET);
-}
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-{
-  size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-  if (!n)
-	break;
-  base += n;
-  if (base + 1 == alloc)
-	{
-	  alloc *= 2;
-	  buffer = XRESIZEVEC (char, buffer, alloc);
-	}
-}
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'.  */
 
@@ -725,31 +687,27 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, );
-
   /* Dump out an array containing the binary.
  FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-{
-  fprintf (cfile, "\n\t");
-  for (size_t j = i; j < i + 17 && j < len; j++)
-	fprintf (cfile, "%3u,", (unsigned char) input[j]);
-}
-  fprintf (cfile, "\n};\n\n");
+  fprintf (cfile,
+	   "static unsigned char gcn_code[] = {\n"
+	   "#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") == __STDC_EMBED_FOUND__\n"
+	   "#embed \"%s\"\n"
+	   "#else\n"
+	   "#error \"#embed '%s' failed\"\n"
+	   "#endif\n"
+	   "};\n\n", fname_in, fname_in, fname_in);
 
   fprintf (cfile,
 	   "static const struct gcn_image {\n"
 	   "  size_t size;\n"
 	   "  void *image;\n"
 	   "} gcn_image = {\n"
-	   "  %zu,\n"
+	   "  sizeof(gcn_code),\n"
 	   "  gcn_code\n"
-	   "};\n\n",
-	   len);
+	   "};\n\n");
 
   fprintf (cfile,
 	   "static const struct gcn_data {\n"
@@ -1312,13 +1270,7 @@ main (int argc, char **argv)
   fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
   obstack_free (_argv_obstack, NULL);
 
-  in = fopen (gcn_o_name, "r");
-  if (!in)
-	fatal_error (input_location, "cannot open intermediate gcn obj file");
-
-  process_obj (in, cfile, omp_requires);
-
-  fclose (in);
+  process_obj (gcn_o_name, cfile, omp_requires);
 
   xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
   xputenv (concat ("COMPILER_PATH=", cpath, NULL));


[Patch] gcn/mkoffload.cc: Use #embed for including the generated ELF file

2024-06-21 Thread Tobias Burnus

mkoffload's generated .c file looks much nicer with '#embed'.

This patch depends on Jakub's #embed patch at
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655012.html

It might be a tiny bit faster than currently (or not); however,
once #embed has a large-file mode, it should also speed up
the offloading compilation quit a bit.

OK for mainline, once '#embed' support is in?

Tobias
gcn/mkoffload.cc: Use #embed for including the generated ELF file

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (read_file): Remove.
	(process_obj): Generate C file that uses #embed.
	(main): Update call to it; remove no longer needed file I/O.

 gcc/config/gcn/mkoffload.cc | 66 +
 1 file changed, 12 insertions(+), 54 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a799b..0ccb874398a 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@ xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-{
-  /* Get the file size.  */
-  long s = ftell (stream);
-  if (s >= 0)
-	alloc = s + 100;
-  fseek (stream, 0, SEEK_SET);
-}
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-{
-  size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-  if (!n)
-	break;
-  base += n;
-  if (base + 1 == alloc)
-	{
-	  alloc *= 2;
-	  buffer = XRESIZEVEC (char, buffer, alloc);
-	}
-}
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'.  */
 
@@ -725,31 +687,27 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, );
-
   /* Dump out an array containing the binary.
  FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-{
-  fprintf (cfile, "\n\t");
-  for (size_t j = i; j < i + 17 && j < len; j++)
-	fprintf (cfile, "%3u,", (unsigned char) input[j]);
-}
-  fprintf (cfile, "\n};\n\n");
+  fprintf (cfile,
+	   "static unsigned char gcn_code[] = {\n"
+	   "#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") == __STDC_EMBED_FOUND__\n"
+	   "#embed \"%s\"\n"
+	   "#else\n"
+	   "#error \"#embed '%s' failed\"\n"
+	   "#endif\n"
+	   "};\n\n", fname_in, fname_in, fname_in);
 
   fprintf (cfile,
 	   "static const struct gcn_image {\n"
 	   "  size_t size;\n"
 	   "  void *image;\n"
 	   "} gcn_image = {\n"
-	   "  %zu,\n"
+	   "  sizeof(gcn_code),\n"
 	   "  gcn_code\n"
-	   "};\n\n",
-	   len);
+	   "};\n\n");
 
   fprintf (cfile,
 	   "static const struct gcn_data {\n"
@@ -1316,7 +1274,7 @@ main (int argc, char **argv)
   if (!in)
 	fatal_error (input_location, "cannot open intermediate gcn obj file");
 
-  process_obj (in, cfile, omp_requires);
+  process_obj (gcn_o_name, cfile, omp_requires);
 
   fclose (in);
 


[Patch] OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause [PR11555]

2024-06-21 Thread Tobias Burnus

Hi all,

it turned out that 'declare target' with 'link' clause was broken in multiple 
ways.

The main fix is the attached patch, i.e. namely pushing the variables already to
the offload-vars list already in the FE.

When implementing it, I noticed:
* C has a similar issue when using nested functions, which is
  a GNU extension →https://gcc.gnu.org/115574

* When doing partial mapping of arrays (which is one of the reasons for 'link'),
  offsets are mishandled in Fortran (not tested in C), see FIXME in the patch)
  There: arr2(10) should print 10 but with map(arr2(10:)) it prints 19.
  (I will file a PR about this).

* It might happen that linked variables do not get linked. I have not 
investigated
  why, but 'arr2' gives link errors – while 'arr' works.
  See FIXME in the patch. (I will file a PR about this)

* For COMMON blocks, map(/common/) is rejected,https://gcc.gnu.org/PR115577

* When then mapping map(a,b,c) which is identical for 'common /mycom/ a,b,c',
  it fails to link the device side as the 'mycom_' symbol cannot be found on the
  device side.  (I will file a PR about this)

As COMMON as issues, an alternative would be to defer the trans-common.cc
changes to a later patch.

Comments, questions, concerns?

Tobias

PS: Tested with nvptx offloading with a page-migration supporting system with
nvptx and GCN offloading configured and no new fails observed.
OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause [PR11555]

Contrary to a normal 'declare target', the 'declare target link' attribute
also needs to set node->offloadable and push the offload_vars in the front end.

Linked variables require that the data is mapped. For module variables, this
can happen anywhere. For variables in an external subprograms or the main
programm, this can only happen in the either that program itself or in an
internal subprogram. - Whether a variable is just normally mapped or linked then
becomes relevant if a device routine exists that can access that variable,
i.e. an internal procedure has then to be marked as declare target.

	PR fortran/115559

gcc/fortran/ChangeLog:

	* trans-common.cc (build_common_decl): Add 'omp declare target' and
	'omp declare target link' variables to offload_vars.
	* trans-decl.cc (add_attributes_to_decl): Likewise; update args and
	call decl_attributes.
	(get_proc_pointer_decl, gfc_get_extern_function_decl,
	build_function_decl): Update calls.
	(gfc_get_symbol_decl): Likewise; move after 'DECL_STATIC (t)=1'
	to avoid errors with symtab_node::get_create.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/declare-target-link.f90: New test.

 gcc/fortran/trans-common.cc|  21 
 gcc/fortran/trans-decl.cc  |  81 +-
 .../libgomp.fortran/declare-target-link.f90| 119 +
 3 files changed, 195 insertions(+), 26 deletions(-)

diff --git a/gcc/fortran/trans-common.cc b/gcc/fortran/trans-common.cc
index 5f44e7bd663..e714342c3c0 100644
--- a/gcc/fortran/trans-common.cc
+++ b/gcc/fortran/trans-common.cc
@@ -98,6 +98,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "cgraph.h"
+#include "context.h"
+#include "omp-offload.h"
 #include "gfortran.h"
 #include "trans.h"
 #include "stringpool.h"
@@ -497,6 +500,24 @@ build_common_decl (gfc_common_head *com, tree union_type, bool is_init)
 	  = tree_cons (get_identifier ("omp declare target"),
 		   omp_clauses, DECL_ATTRIBUTES (decl));
 
+  if (com->omp_declare_target_link || com->omp_declare_target)
+	{
+	  /* Add to offload_vars; get_create does so for omp_declare_target,
+	 omp_declare_target_link requires manual work.  */
+	  gcc_assert (symtab_node::get (decl) == 0);
+	  symtab_node *node = symtab_node::get_create (decl);
+	  if (node != NULL && com->omp_declare_target_link)
+	{
+	  node->offloadable = 1;
+	  if (ENABLE_OFFLOADING)
+		{
+		  g->have_offload = true;
+		  if (is_a  (node))
+		vec_safe_push (offload_vars, decl);
+		}
+	}
+	}
+
   /* Place the back end declaration for this common block in
  GLOBAL_BINDING_LEVEL.  */
   gfc_map_of_all_commons[identifier] = pushdecl_top_level (decl);
diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index 8d4f06a4e1d..4067dd6ed77 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc
@@ -46,7 +46,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "trans-stmt.h"
 #include "gomp-constants.h"
 #include "gimplify.h"
+#include "context.h"
 #include "omp-general.h"
+#include "omp-offload.h"
 #include "attr-fnspec.h"
 #include "tree-iterator.h"
 #include "dependency.h"
@@ -1470,19 +1472,18 @@ gfc_add_assign_aux_vars (gfc_symbol * sym)
 }
 
 
-static tree
-add_attributes_to_decl (symbol_attribute sym_attr, tree list)
+static void
+add_attributes_to_decl (tree *decl_p, const gfc_symbol *sym)
 {
   unsigned id;
-  tree attr;
+  tree list = 

Re: [PATCH v5 2/6] libgomp, openmp: Add ompx_gnu_pinned_mem_alloc

2024-06-12 Thread Tobias Burnus

Andrew Stubbs wrote:

Compared to the previous v4 (1/5) posting of this patch:
- The enumeration of the ompx allocators have been moved (again) to 200
   (as 100 is already in use by another toolchain vendor and this seems
   like a possible source of confusion).
- The "ompx" has also been changed to "ompx_gnu" to highlight that these
   are specifically GNU extensions.
- The failure mode of the testcases had been modified, including adding
   an abort in CHECK_SIZE and skipping the test on unsupported platforms.
- The OMP_ALLOCATE environment variable now supports the new allocator.
- The Fortran frontend allows use of the new allocator in "allocator"
   clauses.

---

This creates a new predefined allocator as a shortcut for using pinned
memory with OpenMP.  This is not in the OpenMP standard so it uses the "ompx"
namespace and an independent enum baseline of 200 (selected to not clash with
other known implementations).

The allocator is equivalent to using a custom allocator with the pinned
trait and the null fallback trait.  One motivation for having this feature is
for use by the (planned) -foffload-memory=pinned feature.


The patch LGTM.

Thanks!

Tobias

gcc/fortran/ChangeLog:

* openmp.cc (is_predefined_allocator): Update valid ranges to
  incorporate ompx_gnu_pinned_mem_alloc.

libgomp/ChangeLog:

* allocator.c (ompx_gnu_min_predefined_alloc): New.
(ompx_gnu_max_predefined_alloc): New.
(predefined_alloc_mapping): Rename to ...
(predefined_omp_alloc_mapping): ... this.
(predefined_ompx_gnu_alloc_mapping): New.
(_Static_assert): Adjust for the new name, and add a new assert for the
new table.
(predefined_allocator_p): New.
(predefined_alloc_mapping): New.
(omp_aligned_alloc): Support ompx_gnu_pinned_mem_alloc.
Use predefined_allocator_p and predefined_alloc_mapping.
(omp_free): Likewise.
(omp_alligned_calloc): Likewise.
(omp_realloc): Likewise.
* env.c (parse_allocator): Add ompx_gnu_pinned_mem_alloc.
* libgomp.texi: Document ompx_gnu_pinned_mem_alloc.
* omp.h.in (omp_allocator_handle_t): Add ompx_gnu_pinned_mem_alloc.
* omp_lib.f90.in: Add ompx_gnu_pinned_mem_alloc.
* omp_lib.h.in: Add ompx_gnu_pinned_mem_alloc.
* testsuite/libgomp.c/alloc-pinned-5.c: New test.
* testsuite/libgomp.c/alloc-pinned-6.c: New test.
* testsuite/libgomp.fortran/alloc-pinned-1.f90: New test.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-pinned-1.f90: New test.

Co-Authored-By: Thomas Schwinge
---
  gcc/fortran/openmp.cc |  11 +-
  .../gfortran.dg/gomp/allocate-pinned-1.f90|  16 +++
  libgomp/allocator.c   | 115 +-
  libgomp/env.c |   1 +
  libgomp/libgomp.texi  |   7 +-
  libgomp/omp.h.in  |   1 +
  libgomp/omp_lib.f90.in|   2 +
  libgomp/omp_lib.h.in  |   2 +
  libgomp/testsuite/libgomp.c/alloc-pinned-5.c  | 100 +++
  libgomp/testsuite/libgomp.c/alloc-pinned-6.c  | 102 
  .../libgomp.fortran/alloc-pinned-1.f90|  16 +++
  11 files changed, 336 insertions(+), 37 deletions(-)
  create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-pinned-1.f90
  create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-5.c
  create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-6.c
  create mode 100644 libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90


Re: [PATCH v5 1/6] libgomp: change alloc-pinned tests failure mode

2024-06-12 Thread Tobias Burnus

Andrew Stubbs wrote:

The feature doesn't work on non-Linux hosts, at present, so skip the tests
entirely.

On Linux systems that have insufficient lockable memory configured we still
need to fail or else the feature won't be getting tested when we think it is,
but now there's a message to explain why.

libgomp/ChangeLog:

* testsuite/libgomp.c/alloc-pinned-1.c: Change dg-xfail-run-if to
dg-skip-if.
Correct spelling mistake.
Abort on insufficient lockable memory.
Use #error on non-linux hosts.
* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.


LGTM. Thanks!

Tobias



Re: [Patch, PR Fortran/90072] Polymorphic Dispatch to Polymophic Return Type Memory Leak

2024-06-08 Thread Tobias Burnus

Andre Vehreschild wrote:

PS That's good news about the funding. Maybe we will get to see "built in"
coarrays soon?

You hopefully will see Nikolas work on the shared memory coarray support, if
that is what you mean by "built in" coarrays. I will be working on the
distributed memory coarray support esp. fixing the module issues and some other
team related things.


Cool! (Both of it.)

I assume "distributed memory coarray support" is still based on Open
Coarrays?

* * *

I am asking because there is coarray API being defined: Parallel Runtime
Interface for Fortran (PRIF), https://go.lbl.gov/prif

with an implementation called Caffeine – CoArray Fortran Framework of
Efficient Interfaces to Network Environments,
https://crd.lbl.gov/caffeine which uses GASNet or POSIX processes.

Well, the among the implementers is (unsurprising?) Damian – and the
idea seems to be that LLVM's FLANG will use the API.

Tobias

PS: I think it might be useful in the long run to support both
PRIF/Caffeine and OpenCoarrays.

I have attached my hello-world patch for -fcoarray=prif that I wrote
after ISC-HPC; it only handles this_image() / num_images() + init/stop.
I got confirmation by the PRIF developers that the next revision will
permit calling __prif_MOD_prif_init multiple times such that one can use
it in the constructor for static coarrays, which won't work otherwise.
gcc/ChangeLog:

	* flag-types.h (enum gfc_fcoarray):

gcc/fortran/ChangeLog:

	* invoke.texi:
	* lang.opt:
	* trans-decl.cc (gfc_build_builtin_function_decls):
	(create_main_function):
	* trans-intrinsic.cc (trans_this_image):
	(trans_num_images):
	* trans.h (GTY):

 gcc/flag-types.h   |  3 ++-
 gcc/fortran/invoke.texi|  7 +-
 gcc/fortran/lang.opt   |  5 +++-
 gcc/fortran/trans-decl.cc  | 56 --
 gcc/fortran/trans-intrinsic.cc | 42 +++
 gcc/fortran/trans.h|  5 
 6 files changed, 108 insertions(+), 10 deletions(-)

diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 5a2b461fa75..babd747c01d 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -427,7 +427,8 @@ enum gfc_fcoarray
 {
   GFC_FCOARRAY_NONE = 0,
   GFC_FCOARRAY_SINGLE,
-  GFC_FCOARRAY_LIB
+  GFC_FCOARRAY_LIB,
+  GFC_FCOARRAY_PRIF
 };
 
 
diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index 40e8e4a7cdd..331a40d31db 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -1753,7 +1753,12 @@ Single-image mode, i.e. @code{num_images()} is always one.
 
 @item @samp{lib}
 Library-based coarray parallelization; a suitable GNU Fortran coarray
-library needs to be linked.
+library needs to be linked such as @url{http://opencoarrays.org}.
+
+@item @samp{prif}
+Using the Parallel Runtime Interface for Fortran (PRIF),
+@url{https://go.lbl.gov/@/prif}; for instance, via Caffeine,
+@url{https://go.lbl.gov/@/caffeine}.
 @end table
 
 
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index 5efd4a0129a..9ba957d5571 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -786,7 +786,7 @@ Copy array sections into a contiguous block on procedure entry.
 
 fcoarray=
 Fortran RejectNegative Joined Enum(gfc_fcoarray) Var(flag_coarray) Init(GFC_FCOARRAY_NONE)
--fcoarray=	Specify which coarray parallelization should be used.
+-fcoarray=	Specify which coarray parallelization should be used.
 
 Enum
 Name(gfc_fcoarray) Type(enum gfc_fcoarray) UnknownError(Unrecognized option: %qs)
@@ -800,6 +800,9 @@ Enum(gfc_fcoarray) String(single) Value(GFC_FCOARRAY_SINGLE)
 EnumValue
 Enum(gfc_fcoarray) String(lib) Value(GFC_FCOARRAY_LIB)
 
+EnumValue
+Enum(gfc_fcoarray) String(prif) Value(GFC_FCOARRAY_PRIF)
+
 fcheck=
 Fortran RejectNegative JoinedOrMissing
 -fcheck=[...]	Specify which runtime checks are to be performed.
diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index dca7779528b..d1c0e2ee997 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc
@@ -170,6 +170,10 @@ tree gfor_fndecl_co_sum;
 tree gfor_fndecl_caf_is_present;
 tree gfor_fndecl_caf_random_init;
 
+tree gfor_fndecl_prif_init;
+tree gfor_fndecl_prif_stop;
+tree gfor_fndecl_prif_this_image_no_coarray;
+tree gfor_fndecl_prif_num_images;
 
 /* Math functions.  Many other math functions are handled in
trans-intrinsic.cc.  */
@@ -4147,6 +4151,31 @@ gfc_build_builtin_function_decls (void)
 	get_identifier (PREFIX("caf_random_init")),
 	void_type_node, 2, logical_type_node, logical_type_node);
 }
+  else if (flag_coarray == GFC_FCOARRAY_PRIF)
+{
+  tree pint_type = build_pointer_type (integer_type_node);
+  tree pbool_type = build_pointer_type (boolean_type_node);
+  tree pintmax_type_node = get_typenode_from_name (INTMAX_TYPE);
+  pintmax_type_node = build_pointer_type (pintmax_type_node);
+
+  gfor_fndecl_prif_init = gfc_build_library_function_decl_with_spec (
+	get_identifier ("__prif_MOD_prif_init"), ". W ",
+	void_type_node, 1, 

Re: [wwwdocs] gcc-15/changes.html + projects/gomp: update for new OpenMP features

2024-06-08 Thread Tobias Burnus

Hi Gerald,

Gerald Pfeifer wrote:

Looks like a janitorial task to fix the absolute links, possibly
excluding those with /git, /onlinedocs, /wiki – or assuming that the
main page is GCC.gnu.org, relying on the redirects.

It's on my list. A first quick check indicates there isn't much to do,
though. :-)


You could consider

htdocs/search.html:

to avoid a redirect (but it is not a broken link);
otherwise, I but I concur that it seems to be (mostly) fine :-)

* * *


+  loop-transformation constructs are now supported.
I'm thinking "loop transformation" in English? Or is this a specific term
from the standard?

Loop transformation happens at the end. But e.g "(#pragma omp) unroll
full" is a directive and, e.g.
...
is a construct (= directive + structured block (if any) + end directive
(if any)).

I believe there was a misunderstanding and I wasn't clear enough: I was
wondering whether instead of "loop-transformation" the patch should have
"loop transformation".

In your response you use the version without dash, so I guess we agree?
:-)


(Pedantically it's a hyphen (-) and not a(n en/em) dash (–/—), i.e. '-' 
not '--' or '---' in TeX.)


No, we don't. – There is a difference whether the two words are used 
alone or as modifier to a noun, like the "this is well defined" vs. "a 
well-defined project".


Thus, while "loop transformation happens" is without hyphen (as we both 
agree),* for "loop(-| )tranformation constructs" the (non-)usage of 
hyphens is not well defined; grouping wise, those are clearly '((loop 
transformation) constructs)' and not '(loop (transformation constructs))'.


I believe both variants are perfectly fine.

BTW: In the OpenMP pre-6.0 draft (TR12), the verb 'transform' is now 
used as noun not with suffix '-ation' but with the suffix '-ing' (also 
referred to as gerund) such that a section title now uses 
"Loop-Transforming Constructs"; I think for '(word) plus (-ing word)' – 
used as modifier –, a hyphen is a tad more common than for '(word) plus 
'(word with -ation suffix)'.


Tobias

* The Oxford Guide to Style points out some words that do get 
hyphenated: clear-cut, drip-proof, take-off, part-time, … – or to refer 
to the abstract meaning rather than literal: bull's-eye, crow's-feet, … 
— Formerly, present particle plus noun got hyphenated when the compound 
was acted on: walking-stick, walking-frame. Likewise, it was formerly 
normal in British English to hyphenate a single adjectival noun and the 
noun it modified: note-cue, title-page, volume-number (less common now, 
but can linger in some combination). And until recently: small 
scale-factory (vs. small-scale factory), white water-lily (vs. 
white-water lily).


gcc-wwwdocs branch master updated. 4260d675af42b9c97e29818ab3b3154d27103d49

2024-06-07 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  4260d675af42b9c97e29818ab3b3154d27103d49 (commit)
  from  8507122b38e6b60e8f2f3c8cd339d4f318377203 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 4260d675af42b9c97e29818ab3b3154d27103d49
Author: Tobias Burnus 
Date:   Fri Jun 7 10:06:52 2024 +0200

gcc-15/changes.html + projects/gomp: update for new OpenMP features

GCC 15 now supports unified-shared memory and the tile/unroll constructs
in OpenMP.

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index 0ea7bdec..a121f40a 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -40,6 +40,24 @@ a work-in-progress.
 
 New Languages and Language specific improvements
 
+
+  OpenMP
+  
+
+  Support for unified-shared memory has been added for some AMD and Nvidia
+  GPU devices, enabled when using the unified_shared_memory
+  clause to the requires directive. For details,
+  see the offload-target specifics section in the
+  https://gcc.gnu.org/onlinedocs/libgomp/Offload-Target-Specifics.html;
+  >GNU Offloading and Multi Processing Runtime Library Manual.
+
+
+  OpenMP 5.1: The unroll and tile
+  loop-transformation constructs are now supported.
+
+  
+
+
 
 
 
diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 94bda5ff..d1765fc3 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -313,18 +313,21 @@ than listed, depending on resolved corner cases and 
optimizations.
   
   
 requires directive
-
+
   GCC9
   GCC12
   GCC13
-  GCC14
+  GCC14
+  GCC15
 
 
   (atomic_default_mem_order)
   (dynamic_allocators)
   complete but no non-host devices provides unified_address or
   unified_shared_memory
-  complete but no non-host devices provides 
unified_shared_memory
+  complete but no non-host devices provides 
unified_shared_memory
+  complete; see also https://gcc.gnu.org/onlinedocs/libgomp/Offload-Target-Specifics.html;>
+  Offload-Target Specifics
 
   
   
@@ -706,7 +709,7 @@ than listed, depending on resolved corner cases and 
optimizations.
   
   
 Loop transformation constructs
-No
+GCC15
 
   
   

---

Summary of changes:
 htdocs/gcc-15/changes.html  | 18 ++
 htdocs/projects/gomp/index.html | 11 +++
 2 files changed, 25 insertions(+), 4 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


gcc-wwwdocs branch master updated. 8507122b38e6b60e8f2f3c8cd339d4f318377203

2024-06-07 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  8507122b38e6b60e8f2f3c8cd339d4f318377203 (commit)
  from  1db5b34eb8cf47f070f643f993d835149bce2ec7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 8507122b38e6b60e8f2f3c8cd339d4f318377203
Author: Tobias Burnus 
Date:   Fri Jun 7 09:58:52 2024 +0200

gcc-15/changes.html (nvptx): Constructors are now supported

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index b59fd3be..0ea7bdec 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -85,7 +85,14 @@ a work-in-progress.
 
 
 
-
+NVPTX
+
+
+  GCC's nvptx target now supports constructors and destructors.
+  For this, a recent version of https://gcc.gnu.org/install/specific.html#nvptx-x-none;
+  >nvptx-tools is required.
+
 
 
 

---

Summary of changes:
 htdocs/gcc-15/changes.html | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)


hooks/post-receive
-- 
gcc-wwwdocs


Re: [wwwdocs] gcc-15/changes.html + projects/gomp: update for new OpenMP features

2024-06-06 Thread Tobias Burnus

Hi Gerald,

Gerald Pfeifer wrote:

+++ b/htdocs/gcc-15/changes.html
+
+  https://gcc.gnu.org/projects/gomp/;>OpenMP

Can you please make this a relative link, i.e. "../projects/gomp/"?


Good point. I thought such links should be absolute because of 
(www.)GNU.org, i.e.


https://www.gnu.org/software/gcc/releases.html

... but also that page has https://www.gnu.org/software/gcc/projects/gomp/

GNU.org does not have the documentation, but going to 
https://www.gnu.org/software/gcc/onlinedocs/ or a subpage redirects (302 
temporary redirect) to the GCC website. Likewise for '../git' but for 
'../wiki' it has a HTTP 404 not found; fortunately, ../wiki/ works.


I think there are plenty of links which could be relative ones but are 
absolute ones.


Looks like a janitorial task to fix the absolute links, possibly 
excluding those with /git, /onlinedocs, /wiki – or assuming that the 
main page is GCC.gnu.org, relying on the redirects.


In any case, those links are probably broken on GNU.org:

htdocs/gcc-14/porting_to.html:href="/onlinedocs/gcc-14.1.0/gcc/Diagnostic-Pragmas.html">#pragma 
GCC diagnostic warning


htdocs/gcc-5/changes.html:    A href="/onlinedocs/libstdc++/manual/using_dual_abi.html">Dual


* * *


+
+  OpenMP 5.1: The unroll and tile
+  loop-transformation constructs are now supported.
+

I'm thinking "loop transformation" in English? Or is this a specific term
from the standard?


Loop transformation happens at the end. But e.g "(#pragma omp) unroll 
full" is a directive and, e.g.


#pragma omp unroll partial(2)

for (int i=0; i < n; i++)

a[i] = 5;

is a construct (= directive + structured block (if any) + end directive 
(if any)).


Tobias



Re: [committed] nvptx, libgfortran: Switch out of "minimal" mode

2024-06-06 Thread Tobias Burnus

Sandra Loosemore wrote:

On 6/6/24 06:06, Tobias Burnus wrote:
+@item I/O within OpenMP target regions and OpenACC compute regions 
is supported

+  using the C library @code{printf} functions.
+  Additionally, the Fortran @code{print}/@code{write} 
statements are
+  supported within OpenMP target regions, but not yet OpenACC 
compute
+  regions.  @c The latter needs 
'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE'.




I think an "in" (or 'within') is missing before OpenACC.


Yes, "...not yet within OpenACC compute regions", please.


Thanks! Committed as https://gcc.gnu.org/r15-1072-g423522aacd9f30

Tobias



[gcc r15-1072] libgomp.texi (nvptx): Add missing preposition

2024-06-06 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:423522aacd9f30bb75aa77d38fccb630bfc4c98a

commit r15-1072-g423522aacd9f30bb75aa77d38fccb630bfc4c98a
Author: Tobias Burnus 
Date:   Thu Jun 6 16:37:55 2024 +0200

libgomp.texi (nvptx): Add missing preposition

libgomp/
* libgomp.texi (nvptx): Add missing preposition.

Diff:
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index eb608915938..73e8e39ca42 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6432,7 +6432,7 @@ The implementation remark:
 @item I/O within OpenMP target regions and OpenACC compute regions is supported
   using the C library @code{printf} functions.
   Additionally, the Fortran @code{print}/@code{write} statements are
-  supported within OpenMP target regions, but not yet OpenACC compute
+  supported within OpenMP target regions, but not yet within OpenACC 
compute
   regions.  @c The latter needs 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE'.
 @item Compilation OpenMP code that contains @code{requires reverse_offload}
   requires at least @code{-march=sm_35}, compiling for @code{-march=sm_30}


Re: [committed] nvptx, libgfortran: Switch out of "minimal" mode

2024-06-06 Thread Tobias Burnus

Hi Thomas,

regarding the commit r15-1070-g3a4775d4403f2e / https://gcc.gnu.org/r15-1070

First, thanks for adding I/O support to nvptx offloading.

I have a wording nit, to be confirmed by a native speaker:


--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi

...

+@item I/O within OpenMP target regions and OpenACC compute regions is 
supported

+  using the C library @code{printf} functions.
+  Additionally, the Fortran @code{print}/@code{write} statements are
+  supported within OpenMP target regions, but not yet OpenACC compute
+  regions.  @c The latter needs 
'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE'.




I think an "in" (or 'within') is missing before OpenACC.

Otherwise, it seemed to fine at a glance – and I am happy that that 
feature now finally works :-)


Hooray, no longer using reverse offload ("!$omp target 
device(ancestor:1)") for Fortran I/O when debugging.


Thanks,

Tobias


Re: [PATCH v4 1/5] libgomp, openmp: Add ompx_pinned_mem_alloc

2024-06-06 Thread Tobias Burnus

Hi Andrew, hi Jakub, hello world,

Andrew Stubbs wrote:


Compared to the previous v3 posting of this patch, the enumeration of
the "ompx" allocators have been moved to start at "100"


100 is a bad value - as can be seen below.

As Jakub suggested at 
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640432.html
"given that LLVM uses 100-102 range, perhaps pick a different one, 200 or 150"

(I know that the first review email suggested 100.)


This creates a new predefined allocator as a shortcut for using pinned
memory with OpenMP.  The name uses the OpenMP extension space and is
intended to be consistent with other OpenMP implementations currently in
development.


Namely: ompx_pinned_mem_alloc

RFC: Should we use this name or - similar to LLVM - prefix this by
a vendor prefix instead (gnu_omp_ or gcc_omp_ instead of ompx_)?

IMHO it is fine to use ompx_ for pinned as the semantic is clear
and should be compatible with IBM and AMD.

For other additional memspaces / allocators, I am less sure, i.e.
on OG13 there are:
- ompx_unified_shared_mem_space, ompx_host_mem_space
- ompx_unified_shared_mem_alloc, ompx_host_mem_alloc

(BTW: In light of TR13 naming, the USM one could be
..._devices_all_mem_{alloc,space}, just to start some bikeshading
or following LLVM + Intel '…target_{host,shared}…'.)

* * *

Looking at other compilers:

IBM's compiler, https://www.ibm.com/docs/en/SSXVZZ_16.1.1/pdf/compiler.pdf , 
has:
- ompx_pinned_mem_alloc, tagged as IBM extension and otherwise without 
documenting it further

Checking omp.h, they define it as:
  ompx_pinned_mem_alloc = 9, /* Preview of host pinned memory support */
and additionally have:
  LOMP_MAX_MEM_ALLOC = 1024,

AMD's compiler based on clang has:
  /* Preview of pinned memory support */
  ompx_pinned_mem_alloc = 120,
in addition to the LLVM defines shown below.

Regarding LLVM:
- they don't offer 'pinned'
- they use the prefix 'llvm_omp' not 'ompx'

Namely:
typedef enum omp_allocator_handle_t
...
  llvm_omp_target_host_mem_alloc = 100,
  llvm_omp_target_shared_mem_alloc = 101,
  llvm_omp_target_device_mem_alloc = 102,
...
typedef enum omp_memspace_handle_t
...
  llvm_omp_target_host_mem_space = 100,
  llvm_omp_target_shared_mem_space = 101,
  llvm_omp_target_device_mem_space = 102,

Remark: I did not find a documentation - and while I
understand in principle host and shared, I wonder how
LLVM handles 'device_mem_space' when there is more than
one device.

BTW: OpenMP TR13 avoids this issue by adding two sets of
API routines. Namely:

First, for memspaces,
- omp_get_{device,devices}_memspace
- omp_get_{device,devices}_and_host_memspace
- omp_get_devices_all_memspace

and, secondly, for allocators:
- omp_get_{device,devices}_allocator
- omp_get_{device,devices}_and_host_allocator
- omp_get_devices_all_allocator

where omp_get_device_* takes a single device number and
omp_get_devices_* a list of device numbers while _and_host
automatically adds the initial device to the list.

* * *

Looking at Intel, they even use extensions without prefix:

omp_target_{host,shared,device}_mem_{space,alloc}

and contrary to LLVM they document it with the semantic, cf.
https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2023-1/openmp-memory-spaces-and-allocators.html

* * *


The allocator is equivalent to using a custom allocator with the pinned
trait and the null fallback trait.


...


diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index cdedc7d80e9..18e3f525ec6 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -99,6 +99,8 @@ GOMP_is_alloc (void *ptr)


...


   #define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
-_Static_assert (ARRAY_SIZE (predefined_alloc_mapping)
+_Static_assert (ARRAY_SIZE (predefined_omp_alloc_mapping)
== omp_max_predefined_alloc + 1,
-   "predefined_alloc_mapping must match omp_memspace_handle_t");
+   "predefined_omp_alloc_mapping must match 
omp_memspace_handle_t");
+#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))


I am surprised that this compiles: Why do you re-#define this macro?

* * *


--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -134,6 +134,7 @@ typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM
 omp_cgroup_mem_alloc = 6,
 omp_pteam_mem_alloc = 7,
 omp_thread_mem_alloc = 8,
+  ompx_pinned_mem_alloc = 100,


See remark regarding "100" at the top of this email.


--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
+integer (kind=omp_allocator_handle_kind), &
+ parameter :: ompx_pinned_mem_alloc = 100


Likewise.

* * *

Why didn't you also update omp_lib.h.in?

* * *

I think you really want to update the checking code inside GCC itself,

i.e. for Fortran:

3 |   !$omp allocate(a) allocator(100)

  | 21

Error: Predefined allocator required in ALLOCATOR clause at (1) as the list 
item 'a' at (2) has the 

[wwwdocs] gcc-15/changes.html + projects/gomp: update for new OpenMP features

2024-06-06 Thread Tobias Burnus

GCC 15 now supports unified-shared memory and the tile/unroll constructs
in OpenMP.

Updates https://gcc.gnu.org/gcc-15/changes.html
and https://gcc.gnu.org/projects/gomp/

Comments?

Tobias
gcc-15/changes.html + projects/gomp: update for new OpenMP features

GCC 15 now supports unified-shared memory and the tile/unroll constructs
in OpenMP.

 htdocs/gcc-15/changes.html  | 27 ++-
 htdocs/projects/gomp/index.html | 11 +++
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index b59fd3be..94528ebd 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -40,6 +40,24 @@ a work-in-progress.
 
 New Languages and Language specific improvements
 
+
+  https://gcc.gnu.org/projects/gomp/;>OpenMP
+  
+
+  Support for unified-shared memory has been added for some AMD and Nvidia
+  GPUs devices, enabled only when using the
+  unified_shared_memory clause to the requires
+  directive. For details, see the offload-target specifics section in the
+  https://gcc.gnu.org/onlinedocs/libgomp/Offload-Target-Specifics.html;
+  >GNU Offloading and Multi Processing Runtime Library Manual.
+
+
+  OpenMP 5.1: The unroll and tile
+  loop-transformation constructs are now supported.
+
+  
+
+
 
 
 
diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 94bda5ff..d1765fc3 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -313,18 +313,21 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 requires directive
-
+
   GCC9
   GCC12
   GCC13
-  GCC14
+  GCC14
+  GCC15
 
 
   (atomic_default_mem_order)
   (dynamic_allocators)
   complete but no non-host devices provides unified_address or
   unified_shared_memory
-  complete but no non-host devices provides unified_shared_memory
+  complete but no non-host devices provides unified_shared_memory
+  complete; see also https://gcc.gnu.org/onlinedocs/libgomp/Offload-Target-Specifics.html;>
+  Offload-Target Specifics
 
   
   
@@ -706,7 +709,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Loop transformation constructs
-No
+GCC15
 
   
   


*ping* – Re: [wwwdocs] gcc-15/changes.html (nvptx): Constructors are now supported

2024-06-05 Thread Tobias Burnus
Regarding 
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/653417.html , are 
there any …


Tobias Burnus wrote:

Comments or fine as is?


Tobias



Re: [patch] libgomp: Enable USM for some nvptx devices

2024-06-05 Thread Tobias Burnus

Hi Andrew, hello world,

Now with AMD Instinct MI200 data - see below.

And a better look at the numbers. In terms of USM,
there does not seem to be any clear winner of both
approaches. If we want to draw conclusions, definitely
more runs are needed (statistics):

The runs below show that the differences between runs
can be larger than the effect of mapping vs. USM.
And that OG13's USM was be 40% slower on MI210
(compared with mainline or OG13 'map') while
mainline's USM is about as fast as 'map' (OG13 or mainline)
is not consistent with the MI250X result, were both USM are
slower with mainline's USM being much slower with ~30%
than OG13 with 12%.



Tobias Burnus wrote:


I have now tried it on my laptop with 
BabelStream,https://github.com/UoB-HPC/BabelStream

Compiling with:
echo "#pragma omp requires unified_shared_memory" > omp-usm.h
cmake -DMODEL=omp -DCMAKE_CXX_COMPILER=$HOME/projects/gcc-trunk-offload/bin/g++ 
\
   -DCXX_EXTRA_FLAGS="-g -include ../omp-usm.h -foffload=nvptx-none 
-fopenmp" -DOFFLOAD=ON ..

(and the variants: no -include (→ map) + -DOFFLOAD=OFF (= host), and with 
hostfallback,
via env var (or usm-14 by due to lacking support.)

For mainline, I get (either with libgomp.so of mainline or GCC 14, i.e. w/o USM 
support):
host-14.log 195.84user 0.94system 0 11.20elapsed 1755%CPU 
(0avgtext+0avgdata 1583268maxresident)k
host-mainline.log   200.16user 1.00system 0 11.89elapsed 1691%CPU 
(0avgtext+0avgdata 1583272maxresident)k
hostfallback-mainline.log   288.99user 4.57system 0 19.39elapsed 1513%CPU 
(0avgtext+0avgdata 1583972maxresident)k
usm-14.log  279.91user 5.38system 0 19.57elapsed 1457%CPU 
(0avgtext+0avgdata 1590168maxresident)k
map-14.log  4.17user 0.45system 0   03.58elapsed 129%CPU 
(0avgtext+0avgdata 1691152maxresident)k
map-mainline.log    4.15user 0.44system 0   03.58elapsed 128%CPU 
(0avgtext+0avgdata 1691260maxresident)k
usm-mainline.log    3.63user 1.96system 0   03.88elapsed 144%CPU 
(0avgtext+0avgdata 1692068maxresident)k

Thus: GPU is faster than host, host fallback takes 40% longer than doing host 
compilation.
USM is 15% faster than mapping.


Correction: I shouldn't look at user time but at elapsed time. For the 
latter, USM is 8% slower on mainline; hostfallback is ~70% slower than 
host execution.



With OG13, the pattern is similar, except that USM is only 3% faster.
Here, USM (elapsed) is 2.5% faster. It is a bit difficult to compare the 
results as OG13 is faster for mapping and USM, which makes 
distinguishing OG13 vs mainline performance and the two different USM 
approaches difficult.

host-og13.log   191.51user 0.70system 0 09.80elapsed 1960%CPU 
(0avgtext+0avgdata 1583280maxresident)k
map-hostfallback-og13.log   205.12user 1.09system 0 10.82elapsed 1905%CPU 
(0avgtext+0avgdata 1585092maxresident)k
usm-hostfallback-og13.log   338.82user 4.60system 0 19.34elapsed 1775%CPU 
(0avgtext+0avgdata 1584580maxresident)k
map-og13.log4.43user 0.42system 0   03.59elapsed 135%CPU 
(0avgtext+0avgdata 1692692maxresident)k
usm-og13.log4.31user 1.18system 0   03.68elapsed 149%CPU 
(0avgtext+0avgdata 1686256maxresident)k

* * *


As IT issues are now solved:

(A) On  AMD Instinct MI210 (gfx90a)

The host fallback is here very slow with elapsed time 24s vs. 1.6s for host 
execution.
map and USM seem to be in the same ballpark.
For two 'map' runs, I see a difference of 8%, the USM times are between those 
map results.

I see similar results for OG13 than mainline, except for USM which is ~40% 
slower (elapse time)
than map (OG13 or mainline - or mainline's USM).

host-mainline-2.log 194.00user 7.21system 0 01.44elapsed 13954%CPU 
(0avgtext+0avgdata 1320960maxresident)k
host-mainline.log   221.53user 5.58system 0 01.78elapsed 12716%CPU 
(0avgtext+0avgdata 1318912maxresident)k
hostfallback-mainline-1.log 3073.35user 146.22system 0  24.25elapsed 
13272%CPU (0avgtext+0avgdata 1644544maxresident)k
hostfallback-mainline-2.log 2268.62user 146.13system 0  23.39elapsed 
10320%CPU (0avgtext+0avgdata 1650544maxresident)k
map-mainline-1.log  5.38user 16.16system 0  03.00elapsed 716%CPU 
(0avgtext+0avgdata 1714936maxresident)k
map-mainline-2.log  5.12user 15.93system 0  02.74elapsed 768%CPU 
(0avgtext+0avgdata 1714932maxresident)k
usm-mainline-1.log  7.61user 2.30system 0   02.89elapsed 342%CPU 
(0avgtext+0avgdata 1716984maxresident)k
usm-mainline-2.log  7.75user 2.92system 0   02.89elapsed 369%CPU 
(0avgtext+0avgdata 1716980maxresident)k

host-og13-1.log 213.69user 6.37system 0 01.56elapsed 14026%CPU 
(0avgtext+0avgdata 1316864maxresident)k
hostfallback-map-og13-1.log 3026.68user 123.77system 0  23.69elapsed 
13295%CPU (0avgtext+0avgdata 1642496maxresident)k
hostfallback-map-og1

Re: [patch] libgomp: Enable USM for some nvptx devices

2024-06-04 Thread Tobias Burnus

Andrew Stubbs wrote:


PS: I would love to do some comparisons [...]

Actually, I think testing only data transfer is fine for this, but we
might like to try some different access patterns, besides straight
linear copies.


I have now tried it on my laptop with 
BabelStream,https://github.com/UoB-HPC/BabelStream

Compiling with:
echo "#pragma omp requires unified_shared_memory" > omp-usm.h
cmake -DMODEL=omp -DCMAKE_CXX_COMPILER=$HOME/projects/gcc-trunk-offload/bin/g++ 
\
  -DCXX_EXTRA_FLAGS="-g -include ../omp-usm.h -foffload=nvptx-none 
-fopenmp" -DOFFLOAD=ON ..

(and the variants: no -include (→ map) + -DOFFLOAD=OFF (= host), and with 
hostfallback,
via env var (or usm-14 by due to lacking support.)

For mainline, I get (either with libgomp.so of mainline or GCC 14, i.e. w/o USM 
support):

host-14.log 195.84user 0.94system 0 11.20elapsed 1755%CPU 
(0avgtext+0avgdata 1583268maxresident)k
host-mainline.log   200.16user 1.00system 0 11.89elapsed 1691%CPU 
(0avgtext+0avgdata 1583272maxresident)k
hostfallback-mainline.log   288.99user 4.57system 0 19.39elapsed 1513%CPU 
(0avgtext+0avgdata 1583972maxresident)k
usm-14.log  279.91user 5.38system 0 19.57elapsed 1457%CPU 
(0avgtext+0avgdata 1590168maxresident)k
map-14.log  4.17user 0.45system 0   03.58elapsed 129%CPU 
(0avgtext+0avgdata 1691152maxresident)k
map-mainline.log    4.15user 0.44system 0   03.58elapsed 128%CPU 
(0avgtext+0avgdata 1691260maxresident)k
usm-mainline.log    3.63user 1.96system 0   03.88elapsed 144%CPU 
(0avgtext+0avgdata 1692068maxresident)k

Thus: GPU is faster than host, host fallback takes 40% longer than doing host 
compilation.
USM is 15% faster than mapping.


With OG13, the pattern is similar, except that USM is only 3% faster. Thus, HMM 
seems to win my my laptop.

host-og13.log   191.51user 0.70system 0 09.80elapsed 1960%CPU 
(0avgtext+0avgdata 1583280maxresident)k
map-hostfallback-og13.log   205.12user 1.09system 0 10.82elapsed 1905%CPU 
(0avgtext+0avgdata 1585092maxresident)k
usm-hostfallback-og13.log   338.82user 4.60system 0 19.34elapsed 1775%CPU 
(0avgtext+0avgdata 1584580maxresident)k
map-og13.log4.43user 0.42system 0   03.59elapsed 135%CPU 
(0avgtext+0avgdata 1692692maxresident)k
usm-og13.log4.31user 1.18system 0   03.68elapsed 149%CPU 
(0avgtext+0avgdata 1686256maxresident)k

* * *

I planned to try an AMD Instinct MI200 device, but due to two IT issues, I 
cannot.
(Shutdown for maintenance of the MI250X system and an NFS issues for the MI210 
run,
but being unable to reboot due to the absence of a colleague having tons of 
editors
still open).

Tobias


Re: [patch] libgomp: Enable USM for some nvptx devices

2024-06-03 Thread Tobias Burnus

Andrew Stubbs wrote:

On 03/06/2024 17:46, Tobias Burnus wrote:

Andrew Stubbs wrote:

+    /* If USM has been requested and is supported by all devices
+   of this type, set the capability accordingly. */
+    if (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+  current_device.capabilities |= GOMP_OFFLOAD_CAP_SHARED_MEM;
+


This breaks my USM patches that add the omp_alloc support (because 
it now short-circuits all of those code-paths),


which I believe is fine. Your USM patches are for pseudo-USM, i.e. a 
(useful) bandaid for systems where the memory is not truely 
unified-shared memory but only specially tagged host memory is device 
accessible. (e.g. only memory allocated via cuMemAllocManaged) — And, 
quite similar, for -foffload-memory=pinned.


Er, no.

The default do-nothing USM uses slow uncachable PCI memory accesses 
(on devices that don't have truly shared memory, like APUs).


I have no idea what a "default do nothing USM" is – and using the PCI-E 
to transfer the data is the only option unless there is either a common 
memory controller or some other interconnect Infinity Fabric interconnect).


However, your description sounds as if you talk about pinned memory – 
which by construction cannot migrate – and not about managed memory, 
which is one of the main approaches for USM – especially as that's how 
HMM works and as it avoids to transfer any memory access.


If you use a Linux kernel with HMM and have support for it, the default 
is that upon device access, the page migrates to the GPU (using, e.g. 
PCI-E) and then stays there until the host accesses that memory page 
again, triggering a page fault and transfer back. That's the whole idea 
of HMM and works similar to the migrate to disk feature (aka swapping), 
cf. https://docs.kernel.org/mm/hmm.html


That's the very same behavior as with hipMallocManaged with XNACK 
enabled according to 
https://rocm.docs.amd.com/en/develop/conceptual/gpu-memory.html


As PowerPC + Volta (+ normal kernel) does not support USM but a system 
with + Nvlink does, I bet that on such a system, the memory stays on the 
host and Nvlink does the remote access, but I don't know how Nvlink 
handles caching. (The feature flags state that direct host-memory access 
from the device is possible.)


By contrast, for my laptop GPU (Nvidia RTX A1000) with open kernel 
drivers + CUDA drivers, I bet the memory migration will happen – 
especially as the feature flags direct host-memory access is not possible.


* * *

If host and device access data on the same memory page, page migration 
forth and back will happen continuously, which is very slow.


Also slow is if data is spread over many pages as one gets keeps getting 
page faults until the data is finally completely migrated. The solution 
in that case is a large page such that the data is transferred in 
one/few large chunks.


In general using manual allocation (x = omp_alloc(...)) with a suitable 
allocator can manually avoid the problem by using pinning or large pages 
or … Without knowing the algorithm it is hard to have a generic solution.


If there such a concurrent access issue occurs for compiler generated 
code or with the run-time library, we should definitely try to fix it; 
for user code, it is probably hopeless in the generic case.


* * *

I actually tried to find an OpenMP target-offload benchmark, possibly 
for USM, but I failed. Most seem to be either not available or seriously 
broken – when testing starts by fixing OpenMP syntax bugs, it does not 
increase the trust in the testcase. — Can you suggest a testcase?


* * *

The CUDA Managed Memory and AMD Coarse Grained memory implementation 
uses proper page migration and permits full-speed memory access on the 
device (just don't thrash the pages too fast).


As written, in my understanding that is what happens with HMM kernel 
support for any memory that is not explicitly pinned. The only extra 
trick an implementation can play is pinning the page – such that it 
knows that the memory host does not change (e.g. won't migrates to the 
other NUMA memory of the CPU or to swap space) such that the memory can 
be directly accessed.


I am pretty sure that's the reason, e.g., CUDA pinned memory is faster – 
and it might also help with HMM migration if the destination is known 
not to change; no idea whether the managed memory routines play such 
tricks or not.


Another optimization opportunity exists if it is known that the memory 
won't be accessed by host until the kernel ends, but I don't see this 
guaranteed in general in user code.


* * *

On AMD MI200, your check broken my USM testcases (because the code 
they were testing isn't active).  This is a serious performance problem.


"I need more data." — First, a valid USM testcase should not be broken 
in the mainline. Secondly, I don't see how a generic testcase can have a 
performance issue when USM works. And, I didn't see a tes

Re: [patch] libgomp: Enable USM for some nvptx devices

2024-06-03 Thread Tobias Burnus

Andrew Stubbs wrote:

+    /* If USM has been requested and is supported by all devices
+   of this type, set the capability accordingly.  */
+    if (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+  current_device.capabilities |= GOMP_OFFLOAD_CAP_SHARED_MEM;
+


This breaks my USM patches that add the omp_alloc support (because it 
now short-circuits all of those code-paths),


which I believe is fine. Your USM patches are for pseudo-USM, i.e. a 
(useful) bandaid for systems where the memory is not truely 
unified-shared memory but only specially tagged host memory is device 
accessible. (e.g. only memory allocated via cuMemAllocManaged) — And, 
quite similar, for -foffload-memory=pinned.


I think if a user wants to have pseudo USM – and does so by passing 
-foffload-memory=unified – we can add another flag to the internal 
omp_requires_mask. - By passing this option, a user should then also be 
aware of all the unavoidable special-case issues of pseudo-USM and 
cannot complain if they run into those.


If not, well, then the user either gets true USM (if supported) - or 
host fallback. Either of it is perfectly fine.


With -foffload-memory=unified, the compiler can then add all the 
omp_alloc calls – and, e.g., set a new GOMP_REQUIRES_OFFLOAD_MANAGED 
flag. If that's set, we wouldn't do the line above quoted capability 
setting in libgomp/target.c.


For nvidia, GOMP_REQUIRES_OFFLOAD_MANAGED probably requires 
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, i.e. when 0 then we 
probably want to return -1 also for -foffload-memory=unified. - A quick 
check shows that Tesla K20 (Kepler, sm_35) has 0 while Volta, Ada, 
Ampere (sm_70, sm_82, sm_89) have 1. (I recall using managed memory on 
an old system; page migration to the device worked fine, but a on-host 
accesses while the kernel was still running, crashed the program.|)

|

For amdgcn, my impression is that we don't need to handle 
-foffload-memory=unified as only the MI200 series (+ APUs) supports this 
well, but MI200 also supports true USM (with page migration; for APU it 
makes even less sense). - But, of course, we still may. — Auto-setting 
HSA_XNACK could be still be done MI200, but I wonder how to distinguish 
MI300X vs. MI300A, but it probably doesn't harm (nor help) to set 
HSA_XNACK for APUs …



and it's just not true for devices where all host memory isn't 
magically addressable on the device.

Is there another way to detect truly shared memory?


Do you have any indication that the current checks become true when the 
memory is not accessible?


Tobias


[committed] install.texi (gcn): Fix date of recommended newlib version

2024-06-03 Thread Tobias Burnus

Somehow, I was one year ahead. The commit wasn't 2025-03-25 but in 2024.

Committed as obvious, also to avoid future confusions.

Tobias
commit 16fb3abf0fb4b88ee0e27732db217909fa429a81
Author: Tobias Burnus 
Date:   Mon Jun 3 12:56:39 2024 +0200

install.texi (gcn): Fix date of recommended newlib version

gcc/ChangeLog:

* doc/install.texi (gcn): Fix date of recommended newlib version.

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 42b462a2ce2..c781646ac1f 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3950,7 +3950,7 @@ by specifying a @code{--with-multilib-list=} that does not list @code{gfx1100}
 and @code{gfx1103}.
 
 Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commit
-7dd4eb1db (2025-03-25, post-4.4.0) fixes device console output for GFX10 and
+7dd4eb1db (2024-03-25, post-4.4.0) fixes device console output for GFX10 and
 GFX11 devices).
 
 To run the binaries, install the HSA Runtime from the


[gcc r15-990] install.texi (gcn): Fix date of recommended newlib version

2024-06-03 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:16fb3abf0fb4b88ee0e27732db217909fa429a81

commit r15-990-g16fb3abf0fb4b88ee0e27732db217909fa429a81
Author: Tobias Burnus 
Date:   Mon Jun 3 12:56:39 2024 +0200

install.texi (gcn): Fix date of recommended newlib version

gcc/ChangeLog:

* doc/install.texi (gcn): Fix date of recommended newlib version.

Diff:
---
 gcc/doc/install.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 42b462a2ce2..c781646ac1f 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3950,7 +3950,7 @@ by specifying a @code{--with-multilib-list=} that does 
not list @code{gfx1100}
 and @code{gfx1103}.
 
 Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commit
-7dd4eb1db (2025-03-25, post-4.4.0) fixes device console output for GFX10 and
+7dd4eb1db (2024-03-25, post-4.4.0) fixes device console output for GFX10 and
 GFX11 devices).
 
 To run the binaries, install the HSA Runtime from the


Re: [patch] install.texi (nvptx): Recommend nvptx-tools 2024-05-30

2024-06-03 Thread Tobias Burnus

Richard Biener wrote:

install.texi also has the issue that it's not pre-packaged in a
easy to discover and readable file in the release tarballs and that
the online version is only for trunk.


I always wondered why it is not included at 
https://gcc.gnu.org/onlinedocs/ — it would then also be linked from, 
e.g., https://gcc.gnu.org/gcc-14/index.html


Tobias



Re: [patch] install.texi (nvptx): Recommend nvptx-tools 2024-05-30

2024-06-03 Thread Tobias Burnus

Richard Biener wrote:

On Mon, 3 Jun 2024, Tobias Burnus wrote:

Thomas Schwinge wrote:

In the following, I have then reconsidered that stance; we may actually
"Implement global constructor, destructor support in a conceptually
simpler way than using 'collect2' (the program): implement the respective
functionality in the nvptx-tools 'ld'".  The latter is
<https://github.com/SourceryTools/nvptx-tools/commit/96f8fc59a757767b9e98157d95c21e9fef22a93b>
"ld: Global constructor/destructor support".

The attached patch makes clearer which version should be
installed by recommending this patch (= latest nvptx-tools)
in install.texi.

Can we simply say "newerst" where I guess refering to a github repo
already implies this?


Good question. The problem I see with just referring to a repository 
(even with newest) often means: yes, that software I have (whatever 
version). While if some reference goes to a 2024 version, I might not 
know what version I have but likely an older version → I will update.


Admittedly, as people tend to *not* read the documentation, this 
approach might fail as well. But, maybe, it is sufficient to update GCC 
15's release notes?*


It won't help those not reading with the release notes before building 
and the wording* had to be changed a bit as install.texi no longer 
states what version should be used, but it would be an alternative


(*) https://gcc.gnu.org/pipermail/gcc-patches/2024-June/653417.html

Tobias



[wwwdocs] gcc-15/changes.html (nvptx): Constructors are now supported

2024-06-03 Thread Tobias Burnus

Comments or fine as is?

Tobias
gcc-15/changes.html (nvptx): Constructors are now supported

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index b59fd3be..b3305079 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -85,7 +103,14 @@ a work-in-progress.
 
 
 
-
+NVPTX
+
+
+  GCC's nvptx target now supports constructors and destructors;
+  for this, a recent version of nvptx-tools is https://gcc.gnu.org/install/specific.html#nvptx-x-none;
+  >required.
+
 
 
 



[nvptx] *ping* - [patch] [gcn][nvptx] Add warning to mkoffload for 32bit host code

2024-06-03 Thread Tobias Burnus

Hi Thomas, hi Tom,

any comment regarding this patch?
 https://gcc.gnu.org/pipermail/gcc-patches/2024-April/650007.html

Tobias

Am 25.04.24 um 12:51 schrieb Tobias Burnus:

Motivated by a surprise of a colleague that with -m32,
no offload dumps were created; that's because mkoffload
does not process host binaries when the are 32bit (i.e. ilp32).

Internally, that done as follows: The host compiler passes to
'mkoffload' the used host ABI, i.e. -foffload-abi=ilp32 or -foffload-abi=lp64

That's done via TARGET_OFFLOAD_OPTIONS, which is supported by aarch64, i386, 
and rs6000.

While it is sensible (albeit not strictly required) that GCC requires that
the host and device side agree and that only 64bit is implemented for the
device side, it can be confusing that silently no offloading code is generated.


Hence, I propose to print a warning in that case - as implemented in the 
attached patch:

$ gcc -fopenmp -m32 test.c
nvptx mkoffload: warning: offload code generation skipped: offloading with 
32-bit host code is currently not supported
gcn mkoffload: warning: offload code generation skipped: offloading with 32-bit 
host code is currently not supported

* * *

This shouldn't have any effect on offload builds using -m64
and non-offload builds – while several testcases already have
issues with '-m32' when offloading is enabled or an offloading
device is available.

To make it not worse, this patch adds some pruning and for
a subset of the failing testcases, I added code to avoids FAILS.
There are some more fails, but those aren't new.

Comments, remarks, suggestions?
Is the mkoffload.cc part is okay?

Tobias



[patch] install.texi (nvptx): Recommend nvptx-tools 2024-05-30 (was: Re: nvptx target: Global constructor, destructor support, via nvptx-tools 'ld')

2024-06-03 Thread Tobias Burnus

Thomas Schwinge wrote:

In the following, I have then reconsidered that stance; we may actually
"Implement global constructor, destructor support in a conceptually
simpler way than using 'collect2' (the program): implement the respective
functionality in the nvptx-tools 'ld'".  The latter is

"ld: Global constructor/destructor support".


The attached patch makes clearer which version should be
installed by recommending this patch (= latest nvptx-tools)
in install.texi.

OK? Comments, remarks?

Tobias

PS: If the https://github.com/SourceryTools/nvptx-tools/pull/47
(nvptx-ld.cc: Improve C++11 compatibility with older compilers)
proofs worthwhile and gets merged, we should point to that commit
instead.install.texi (nvptx): Recommend nvptx-tools 2024-05-30

gcc/
	* doc/install.texi (nvptx): Recommend nvptx-tools 2024-05-30 or newer.

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 42b462a2ce2..4859f6743ab 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -4698,7 +4698,8 @@ Andes NDS32 target in big endian mode.
 Nvidia PTX target.
 
 Instead of GNU binutils, you will need to install
-@uref{https://github.com/SourceryTools/nvptx-tools,,nvptx-tools}.
+@uref{https://github.com/SourceryTools/nvptx-tools,,nvptx-tools}
+(recommended: 96f8fc5 of 2024-05-30 -- or newer).
 Tell GCC where to find it:
 @option{--with-build-time-tools=[install-nvptx-tools]/nvptx-none/bin}.
 


Re: [PATCH v2 01/12] OpenMP: metadirective tree data structures and front-end interfaces

2024-05-31 Thread Tobias Burnus

Hi Sandra,

some observations/comments, but in general it looks good.

Sandra Loosemore wrote:

This patch adds the OMP_METADIRECTIVE tree node and shared tree-level
support for manipulating metadirectives.  It defines/exposes
interfaces that will be used in subsequent patches that add front-end
and middle-end support, but nothing generates these nodes yet.

This patch also adds compile-time support for dynamic context
selectors (the target_device selector set and the condition selector
of the user selector set) for metadirectives only.  The "declare
variant" directive still supports only static selectors.

...

  /* Return 1 if context selector matches the current OpenMP context, 0
 if it does not and -1 if it is unknown and need to be determined later.
 Some properties can be checked right away during parsing (this routine),
 others need to wait until the whole TU is parsed, others need to wait until
-   IPA, others until vectorization.  */
+   IPA, others until vectorization.
+
+   METADIRECTIVE_P is true if this is a metadirective context, and DELAY_P
+   is true if it's too early in compilation to determine whether some
+   properties match.
+
+   Dynamic properties (which are evaluated at run-time) should always
+   return 1.  */

I have to admit that I don't really see the use of metadirective_p as …

  int
-omp_context_selector_matches (tree ctx)
+omp_context_selector_matches (tree ctx, bool metadirective_p, bool delay_p)

...

+   if (metadirective_p && delay_p)
+ return -1;


I do see why the resolution of KIND/ARCH/ISA should be delayed – for 
both variant/metadirective as long as the code is run by the host and 
the device. Except that we could exclude, e.g., 'kind(FPGA)' early on as 
we don't support it at all.


But once the device code is split off, I don't see why we can't expand 
the DEVICE clause right away for both variant and metadirective – while 
for 'target_device', we cannot do much until runtime – except of 
excluding things like 'kind(fpga)' – or excluding all 'arch' known not 
to be supported neither by the host nor by any enabled offload devices.


Thus, I see why there is a 'delay_p', but not why there is a 
'metadirective_p'.


But I might have missed something important ...


 case OMP_TRAIT_USER_CONDITION:
   if (set == OMP_TRAIT_SET_USER)
 for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p))
   if (OMP_TP_NAME (p) == NULL_TREE)
 {
+ /* OpenMP 5.1 allows non-constant conditions for
+metadirectives.  */
+ if (metadirective_p
+ && !tree_fits_shwi_p (OMP_TP_VALUE (p)))
+   break;
   if (integer_zerop (OMP_TP_VALUE (p)))
 return 0;
   if (integer_nonzerop (OMP_TP_VALUE (p)))
 break;
   ret = -1;
 }


(BTW: I am happy to be enlightened as I likely have miss some fine print.)

Regarding the comment: True, but shouldn't this be handled before by 
issuing an error when such a clause is used in 'declare variant', i.e. 
only occur when metadirective_p is/can be true?


Besides, I have to admit that I do not understand the new code. The 
current code has: constant zero → whole selector known to be false 
("return 0"); nonzero constant → keep current state, i.e. either 'true' 
(1) or don't known ('-1') and continue; otherwise (not const) → set to 
"don't know" (-1) and continue with the next item.


That seems to make also sense for metadirectives. But your patch changes 
this to keep current state if a variable. In that case, '1' is used if 
this is the only item or the previous condition is true. Or "-1" when 
the previous item is "don't know" (-1). - I think that doesn't make 
sense and it should always return -1 for a run time value.


Additionally, I wonder why you use tree_fits_shwi_p instead of a simple 
'TREE_CODE (OMP_TP_VALUE (p)) != INTEGER_CST'. It does not seem to 
matter here, but '(uint128_t)-1' looks like a valid condition and valid 
constant, which integer_nonzerop should handled but if the hwi is 128bit 
wide, it won't fit into a signed variable.


(As integer_nonzerop and the current code both do "break;" it won't 
change the result of the current code.)


* * *

+static tree
+omp_dynamic_cond (tree ctx)
+{

...

+  /* The user condition is not dynamic if it is constant.  */
+  if (!tree_fits_shwi_p (TREE_VALUE (expr_list)))


Any reason for using tree_fits_shwi_p instead of INTEGER_CST? Here, 
(uint128_t)-1 could make a difference …



+   /* omp_initial_device is -1, omp_invalid_device is -4; choose
+  a value that isn't otherwise defined to indicate the default
+  device.  */
+   device_num = build_int_cst (integer_type_node, -2);


Don't do this - we do it differently 

[gcc r15-924] libgomp.texi: Impl. update for USM and missing 5.2 item

2024-05-30 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:370df6ef0fe6d99613050d33a18cc008be7ceca4

commit r15-924-g370df6ef0fe6d99613050d33a18cc008be7ceca4
Author: Tobias Burnus 
Date:   Thu May 30 13:21:43 2024 +0200

libgomp.texi: Impl. update for USM and missing 5.2 item

libgomp/ChangeLog:

* libgomp.texi (OpenMP 5.0 status): Mark 'requires' as done and
link to 'Offload-Target Specifics'.
(OpenMP 5.2 status): Add item about additional map-type modifiers
in 'declare mapper'.

Diff:
---
 libgomp/libgomp.texi | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index e79bd7a3392..d612488ad10 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -198,8 +198,8 @@ The OpenMP 4.5 specification is fully supported.
 @item @var{target-offload-var} ICV and @code{OMP_TARGET_OFFLOAD}
   env variable @tab Y @tab
 @item Nested-parallel changes to @var{max-active-levels-var} ICV @tab Y @tab
-@item @code{requires} directive @tab P
-  @tab complete but no non-host device provides 
@code{unified_shared_memory}
+@item @code{requires} directive @tab Y
+  @tab See also @ref{Offload-Target Specifics}
 @item @code{teams} construct outside an enclosing target region @tab Y @tab
 @item Non-rectangular loop nests @tab P
   @tab Full support for C/C++, partial for Fortran
@@ -443,6 +443,8 @@ to address of matching mapped list item per 5.1, Sect. 
2.21.7.2 @tab N @tab
   of the @code{interop} construct @tab N @tab
 @item Invoke virtual member functions of C++ objects created on the host device
   on other devices @tab N @tab
+@item @code{iterator} and @code{mapper} as map-type modifier in @code{declare 
mappter}
+  @tab N @tab
 @end multitable


[patch] libgomp.texi: Impl. update for USM and missing 5.2 item

2024-05-29 Thread Tobias Burnus
Now that unified-shared memory works (with some devices), mark it as 'Y' 
and link to the device-specific chapter. While there is always room for 
improvement (like having opt-in partial support for managed-memory 
semi-USM devices), it works sufficienty for a 'Y'.


Additionally, I saw that 5.2 now extended what is permitted inside 
'declare mapper'. Instead of listening the permitted clauses as in 5.1, 
it now refers to the 'map' clause such that 'delete'/'release', 
'present' and in particular 'iterator' and 'mapper' itself are permitted 
inside a declare-mapper 'map' clause. - Thus, I added it as to-do item 
to the 5.2 status.


Comments?

Tobias

PS: As this is also about USM, the declare-target USM issue I mentioned 
in several patch emails is now filed as https://gcc.gnu.org/PR115279libgomp.texi: Impl. update for USM and missing 5.2 item

libgomp/ChangeLog:

	* libgomp.texi (OpenMP 5.0 status): Mark 'requires' as done and
	link to 'Offload-Target Specifics'.
	(OpenMP 5.2 status): Add item about additional map-type modifiers
	in 'declare mapper'.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index e79bd7a3392..03e6455219d 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -198,8 +198,8 @@ The OpenMP 4.5 specification is fully supported.
 @item @var{target-offload-var} ICV and @code{OMP_TARGET_OFFLOAD}
   env variable @tab Y @tab
 @item Nested-parallel changes to @var{max-active-levels-var} ICV @tab Y @tab
-@item @code{requires} directive @tab P
-  @tab complete but no non-host device provides @code{unified_shared_memory}
+@item @code{requires} directive @tab Y
+  @tab See @ref{Offload-Target Specifics}
 @item @code{teams} construct outside an enclosing target region @tab Y @tab
 @item Non-rectangular loop nests @tab P
   @tab Full support for C/C++, partial for Fortran
@@ -443,6 +443,8 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
   of the @code{interop} construct @tab N @tab
 @item Invoke virtual member functions of C++ objects created on the host device
   on other devices @tab N @tab
+@item @code{iterator} and @code{mapper} as map-type modifier in @code{declare mappter}
+  @tab N @tab
 @end multitable
 
 


[gcc r15-899] libgomp: Enable USM for AMD APUs and MI200 devices

2024-05-29 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:18f477980c8597fe3dca2c2e8bd533c0c2b17aa6

commit r15-899-g18f477980c8597fe3dca2c2e8bd533c0c2b17aa6
Author: Tobias Burnus 
Date:   Wed May 29 15:29:06 2024 +0200

libgomp: Enable USM for AMD APUs and MI200 devices

If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true,
all GPUs on the system support unified shared memory. That's
the case for APUs and MI200 devices when XNACK is enabled.

XNACK can be enabled by setting HSA_XNACK=1 as env var for
supported devices; otherwise, if disable, USM code will
use host fallback.

gcc/ChangeLog:

* config/gcn/gcn-hsa.h (gcn_local_sym_hash): Fix typo.

include/ChangeLog:

* hsa.h (HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): Add
enum value.

libgomp/ChangeLog:

* libgomp.texi (gcn): Update USM handling
* plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Handle
USM if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true.

Diff:
---
 gcc/config/gcn/gcn-hsa.h|  2 +-
 include/hsa.h   |  4 +++-
 libgomp/libgomp.texi|  9 +++--
 libgomp/plugin/plugin-gcn.c | 17 +
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 4611bc55392..03220555075 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -80,7 +80,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
writes a new AMD GPU object file and the ABI version needs to be the
same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
-   Note that Fiji is only suppored with LLVM <= 17 as version 3 is no longer
+   Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer
supported in LLVM >= 18.  */
 #define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
 "!march=*|march=*:--amdhsa-code-object-version=4"
diff --git a/include/hsa.h b/include/hsa.h
index f9b5d9daf85..3c7be95d7fd 100644
--- a/include/hsa.h
+++ b/include/hsa.h
@@ -466,7 +466,9 @@ typedef enum {
   /**
   * String containing the ROCr build identifier.
   */
-  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
+  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
+
+  HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
 } hsa_system_info_t;
 
 /**
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 22868635230..e79bd7a3392 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6360,8 +6360,13 @@ The implementation remark:
   such that the next reverse offload region is only executed after the 
previous
   one returned.
 @item OpenMP code that has a @code{requires} directive with
-  @code{unified_shared_memory} will remove any GCN device from the list of
-  available devices (``host fallback'').
+  @code{unified_shared_memory} is only supported if all AMD GPUs have the
+  @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
+  discrete GPUs, this may require setting the @code{HSA_XNACK} environment
+  variable to @samp{1}; for systems with both an APU and a discrete GPU 
that
+  does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to
+  enable only the APU.  If not supported, all AMD GPU devices are removed
+  from the list of available devices (``host fallback'').
 @item The available stack size can be changed using the @code{GCN_STACK_SIZE}
   environment variable; the default is 32 kiB per thread.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 3cdc7ba929f..3d882b5ab63 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3355,8 +3355,25 @@ GOMP_OFFLOAD_get_num_devices (unsigned int 
omp_requires_mask)
   if (hsa_context.agent_count > 0
   && ((omp_requires_mask
   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+  | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
   | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
 return -1;
+  /* Check whether host page access is supported; this is per system level
+ (all GPUs supported by HSA).  While intrinsically true for APUs, it
+ requires XNACK support for discrete GPUs.  */
+  if (hsa_context.agent_count > 0
+  && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+{
+  bool b;
+  hsa_system_info_t type = HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT;
+  hsa_status_t status = hsa_fns.hsa_system_get_info_fn (type, );
+  if (status != HSA_STATUS_SUCCESS)
+   GOMP_PLUGIN_error ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT "
+  "failed");
+  if (!b)
+   return -1;
+}
+
   return hsa_context.agent_count;
 }


[gcc r15-898] libgomp: Enable USM for some nvptx devices

2024-05-29 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:4ccb3366ade6ec9493f8ca20ab73b0da4b9816db

commit r15-898-g4ccb3366ade6ec9493f8ca20ab73b0da4b9816db
Author: Tobias Burnus 
Date:   Wed May 29 15:14:38 2024 +0200

libgomp: Enable USM for some nvptx devices

A few high-end nvptx devices support the attribute
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS; for those, unified shared
memory is supported in hardware. This patch enables support for those -
if all installed nvptx devices have this feature (as the capabilities
are per device type).

This exposes a bug in gomp_copy_back_icvs as it did before use
omp_get_mapped_ptr to find mapped variables, but that returns
the unchanged pointer in cased of shared memory. But in this case,
we have a few actually mapped pointers - like the ICV variables.
Additionally, there was a mismatch with regards to '-1' for the
device number as gomp_copy_back_icvs and omp_get_mapped_ptr count
differently. Hence, do the lookup manually.

include/ChangeLog:

* cuda/cuda.h (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS): Add.

libgomp/ChangeLog:

* libgomp.texi (nvptx): Update USM description.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices):
Claim support when requesting USM and all devices support
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
* target.c (gomp_copy_back_icvs): Fix device ptr lookup.
(gomp_target_init): Set GOMP_OFFLOAD_CAP_SHARED_MEM is the
devices supports USM.

Diff:
---
 include/cuda/cuda.h   |  3 ++-
 libgomp/libgomp.texi  |  7 +--
 libgomp/plugin/plugin-nvptx.c | 15 +++
 libgomp/target.c  | 24 +++-
 4 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 0dca4b3a5c0..804d08ca57e 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -83,7 +83,8 @@ typedef enum {
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
-  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
 } CUdevice_attribute;
 
 enum {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 71d62105a20..22868635230 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6435,8 +6435,11 @@ The implementation remark:
   the next reverse offload region is only executed after the previous
   one returned.
 @item OpenMP code that has a @code{requires} directive with
-  @code{unified_shared_memory} will remove any nvptx device from the
-  list of available devices (``host fallback'').
+  @code{unified_shared_memory} runs on nvptx devices if and only if
+  all of those support the @code{pageableMemoryAccess} property;@footnote{
+  
@uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}}
+  otherwise, all nvptx device are removed from the list of available
+  devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
   in the GCC manual.
 @item The OpenMP routines @code{omp_target_memcpy_rect} and
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5aad3448a8d..4cedc5390a3 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1201,8 +1201,23 @@ GOMP_OFFLOAD_get_num_devices (unsigned int 
omp_requires_mask)
   if (num_devices > 0
   && ((omp_requires_mask
   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+  | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
   | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
 return -1;
+  /* Check whether host page access (direct or via migration) is supported;
+ if so, enable USM.  Currently, capabilities is per device type, hence,
+ check all devices.  */
+  if (num_devices > 0
+  && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+for (int dev = 0; dev < num_devices; dev++)
+  {
+   int pi;
+   CUresult r;
+   r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, ,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, dev);
+   if (r != CUDA_SUCCESS || pi == 0)
+ return -1;
+  }
   return num_devices;
 }
 
diff --git a/libgomp/target.c b/libgomp/target.c
index 5ec19ae489e..48689920d4a 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2969,8 +2969,25 @@ gomp_copy_back_icvs (struct gomp_device_descr *devicep, 
int device)
   if (item == NULL)
 return;
 
+  gomp_mutex_lock (>lock);
+
+  struct splay_tree_s *mem_map = >mem_map;
+  struct splay_tree_key_s cur_node;
+  void *dev_ptr = NULL;
+
   void *host_ptr = >icvs;
-  void *dev_ptr = omp_get_mapped_ptr 

[patch] libgomp: Enable USM for AMD APUs and MI200 devices

2024-05-29 Thread Tobias Burnus

This patch depends (on the libgomp/target.c parts) of the patch
"[patch] libgomp: Enable USM for some nvptx devices",
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652987.html

AMD GPUs that are either APU devices or MI200 [or MI300X]
(with HSA_XNACK=1 set) can access host memory; the run-time library
returns in that case HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = true.

Thus, it makes sense to enable USM support for those devices, which
this patch does. — A simple test with all unified_shared_memory tests
shipping with sollve_vv now works:*

  Test passed on the device.

as tested on an MI200 series device. In line with (some) other compilers,
it requires that HSA_XNACK=1 is set, otherwise the code will be executed
on the host.

(* Well, for C++, -O2 -fno-exception was used but stillonly 5 test case PASS, 1 delete[] etc. link error 1 ICE (segfault during 
IPA pass: cpin gcn gcc) 1 runtime fail for 
tests/5.2/unified_shared_mem/test_target_struct_obj_access.cpp [**] but 
all 15 Fortran and 16 C tests PASS.)


Comments, remarks, suggestions?
Any reason not to commit it to mainline?

Tobias

PS: Richard confirmed that his gfx1036 APU also has
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT == true; at least when
he disables the discrete gfx1030, which neither supports xnack not
is an APU.

** rocgdb shows:

Thread 4 "a.out" received signal SIGSEGV, Segmentation fault.
[Switching to thread 4, lane 0 (AMDGPU Lane 1:1:1:1/0 (0,0,0)[0,0,0])]
0x77309c30 in main._omp_fn () at 
tests/5.2/unified_shared_mem/test_target_struct_obj_access.cpp:88
88if (Emp.name[i] != RefStr[i]) {

but I have not tried to debug this.
libgomp: Enable USM for AMD APUs and MI200 devices

If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true,
all GPUs on the system support unified shared memory. That's
the case for APUs and MI200 devices when XNACK is enabled.

XNACK can be enabled by setting HSA_XNACK=1 as env var for
supported devices; otherwise, if disable, USM code will
use host fallback.

gcc/ChangeLog:

	* config/gcn/gcn-hsa.h (gcn_local_sym_hash): Fix typo.

include/ChangeLog:

	* hsa.h (HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): Add
	enum value.

libgomp/ChangeLog:

	* libgomp.texi (gcn): Update USM handling
	* plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Handle
	USM if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true.

 gcc/config/gcn/gcn-hsa.h|  2 +-
 include/hsa.h   |  4 +++-
 libgomp/libgomp.texi|  9 +++--
 libgomp/plugin/plugin-gcn.c | 18 ++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 4611bc55392..03220555075 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -80,7 +80,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
writes a new AMD GPU object file and the ABI version needs to be the
same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
-   Note that Fiji is only suppored with LLVM <= 17 as version 3 is no longer
+   Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer
supported in LLVM >= 18.  */
 #define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
 			 "!march=*|march=*:--amdhsa-code-object-version=4"
diff --git a/include/hsa.h b/include/hsa.h
index f9b5d9daf85..3c7be95d7fd 100644
--- a/include/hsa.h
+++ b/include/hsa.h
@@ -466,7 +466,9 @@ typedef enum {
   /**
   * String containing the ROCr build identifier.
   */
-  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
+  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
+
+  HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
 } hsa_system_info_t;
 
 /**
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 22868635230..e79bd7a3392 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6360,8 +6360,13 @@ The implementation remark:
   such that the next reverse offload region is only executed after the previous
   one returned.
 @item OpenMP code that has a @code{requires} directive with
-  @code{unified_shared_memory} will remove any GCN device from the list of
-  available devices (``host fallback'').
+  @code{unified_shared_memory} is only supported if all AMD GPUs have the
+  @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
+  discrete GPUs, this may require setting the @code{HSA_XNACK} environment
+  variable to @samp{1}; for systems with both an APU and a discrete GPU that
+  does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to
+  enable only the APU.  If not supported, all AMD GPU devices are removed
+  from the list of available devices (``host fallback'').
 @item The available stack size can be changed using the @code{GCN_STACK_SIZE}
   environment variable; the default is 32 kiB per thread.
 @item Low-latency memory 

Re: [patch] OpenMP: Add -fopenmp-force-usm mode

2024-05-29 Thread Tobias Burnus

Jakub Jelinek wrote:

I mean, if we want to add something, maybe better would an -include like
option that instead of including a file includes it directly.
gcc --include-inline '#pragma omp requires unified_shared_memory' ...


Likewise for Fortran, but there the question is whether it should be in 
the use-stmt, import-stmt, implicit-part or declaration-part; I guess 
having one --include-inline-use-stmt and --include-inline-declaration 
would make sense …


And, I guess, multiple flags should be permitted, which can then be 
processed as separate lines.


Tobias


Re: [patch] OpenMP: Add -fopenmp-force-usm mode

2024-05-29 Thread Tobias Burnus

Jakub Jelinek wrote:

How is that option different from
echo '#pragma omp requires unified_shared_memory' > omp-usm.h
gcc -include omp-usm.h
?
I mean with -include you can add anything you want, not just one particular
directive, and adding a separate option for each is just weird.


For C/C++, -include seems to be indeed sufficient (albeit not widely 
known). For Fortran, there at two issues: One placement/semantic issue: 
it has to be added per "compilation unit", i.e. to the specification 
part of a module, subprogram or main program. And a practical issue, 
gfortran shows:


error: command-line option '-include !$omp requires' is valid for 
C/C++/ObjC/ObjC++ but not for Fortran


Thus, for Fortran it is still intrinsically useful – even if one can 
argue whether that feature is needed at all / whether it should be added 
as command-line argument.


Tobias


Re: [patch] libgomp: Enable USM for some nvptx devices

2024-05-29 Thread Tobias Burnus

Tobias Burnus wrote:
While most of the nvptx systems I have access to don't have the 
support for 
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, one 
has:


Actually, CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS is sufficient. And 
I finally also found the proper webpage for this feature; I couldn't 
find it as Nvidia's documentation uses pageableMemoryAccess and not 
CU_... for that feature. The updated patch is attached.


For details: 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements


In principle, this proper USM is supported by Grace Hopper, PowerPC9 + 
Volta (sm_70) – but for some reasons, our PPC/Volta system does not 
support it. It is also said to work with Turing (sm_75) and newer when 
using Linux Kernel's HMM and the Open Kernel Modules (newer CUDA have 
this but don't use them by default). See link above.


I am not quite sure whether there are unintended side effects, hence, 
I have not enabled support for it in general. In particular, 'declare 
target enter(global_var)' seems to be mishandled (I think it should be 
link + pointer updated to point to the host; cf. description for 
'self_maps'). Thus, it is not enabled by default but only when USM has 
been requested.

OK for mainline?
Comments? Remarks? Suggestions?

Tobias

PS: I guess some more USM tests should be added…
libgomp: Enable USM for some nvptx devices

A few high-end nvptx devices support the attribute
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS; for those, unified shared
memory is supported in hardware. This patch enables support for those -
if all installed nvptx devices have this feature (as the capabilities
are per device type).

This exposes a bug in gomp_copy_back_icvs as it did before use
omp_get_mapped_ptr to find mapped variables, but that returns
the unchanged pointer in cased of shared memory. But in this case,
we have a few actually mapped pointers - like the ICV variables.
Additionally, there was a mismatch with regards to '-1' for the
device number as gomp_copy_back_icvs and omp_get_mapped_ptr count
differently. Hence, do the lookup manually.

include/ChangeLog:

	* cuda/cuda.h (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS): Add.

libgomp/ChangeLog:

	* libgomp.texi (nvptx): Update USM description.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices):
	Claim support when requesting USM and all devices support 
	CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
	* target.c (gomp_copy_back_icvs): Fix device ptr lookup.
	(gomp_target_init): Set GOMP_OFFLOAD_CAP_SHARED_MEM is the
	devices supports USM.

 include/cuda/cuda.h   |  3 ++-
 libgomp/libgomp.texi  |  7 +--
 libgomp/plugin/plugin-nvptx.c | 16 
 libgomp/target.c  | 24 +++-
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 0dca4b3a5c0..804d08ca57e 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -83,7 +83,8 @@ typedef enum {
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
-  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
 } CUdevice_attribute;
 
 enum {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 71d62105a20..ba534b6b3c4 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6435,8 +6435,11 @@ The implementation remark:
   the next reverse offload region is only executed after the previous
   one returned.
 @item OpenMP code that has a @code{requires} directive with
-  @code{unified_shared_memory} will remove any nvptx device from the
-  list of available devices (``host fallback'').
+  @code{unified_shared_memory} will run on nvptx devices if and only if
+  all of those support the @code{pageableMemoryAccess} property;@footnote{
+  @uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}}
+  otherwise, all nvptx device are removed from the list of available
+  devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
   in the GCC manual.
 @item The OpenMP routines @code{omp_target_memcpy_rect} and
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5aad3448a8d..d3764185d4b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1201,8 +1201,24 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
   if (num_devices > 0
   && ((omp_requires_mask
 	   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+	   | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
 	   | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
 return -1;
+  /* Check whether host page access (direct or via migration) is supported;
+ if so, enable USM.  Currently, capa

[patch] libgomp: Enable USM for some nvptx devices

2024-05-28 Thread Tobias Burnus
While most of the nvptx systems I have access to don't have the support 
for CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, 
one has:


Tesla V100-SXM2-16GB (as installed, e.g., on ORNL's Summit) does support 
this feature. And with that feature, unified-shared memory support does 
work, presumably by handling automatic page migration when a page fault 
occurs.


Hence: Enable USM support for those. When doing so, all 'requires 
unified_shared_memory' tests of sollve_vv pass :-)


I am not quite sure whether there are unintended side effects, hence, I 
have not enabled support for it in general. In particular, 'declare 
target enter(global_var)' seems to be mishandled (I think it should be 
link + pointer updated to point to the host; cf. description for 
'self_maps'). Thus, it is not enabled by default but only when USM has 
been requested.


OK for mainline?
Comments? Remarks? Suggestions?

Tobias

PS: I guess some more USM tests should be added…

libgomp: Enable USM for some nvptx devices

A few high-end nvptx devices support the attribute
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES;
for those, unified shared memory is supported in hardware. This
patch enables support for those - if all installed nvptx devices
have this feature (as the capabilities are per device type).

This exposes a bug in gomp_copy_back_icvs as it did before use
omp_get_mapped_ptr to find mapped variables, but that returns
the unchanged pointer in cased of shared memory. But in this case,
we have a few actually mapped pointers - like the ICV variables.
Additionally, there was a mismatch with regards to '-1' for the
device number as gomp_copy_back_icvs and omp_get_mapped_ptr count
differently. Hence, do the lookup manually.

include/ChangeLog:

	* cuda/cuda.h
	(CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES):
	Add.

libgomp/ChangeLog:

	* libgomp.texi (nvptx): Update USM description.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices):
	Claim support when requesting USM and all devices support 
	CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES.
	* target.c (gomp_copy_back_icvs): Fix device ptr lookup.
	(gomp_target_init): Set GOMP_OFFLOAD_CAP_SHARED_MEM is the
	devices supports USM.

 include/cuda/cuda.h   |  3 ++-
 libgomp/libgomp.texi  |  5 -
 libgomp/plugin/plugin-nvptx.c | 15 +++
 libgomp/target.c  | 24 +++-
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 0dca4b3a5c0..db640d20366 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -83,7 +83,8 @@ typedef enum {
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
-  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100
 } CUdevice_attribute;
 
 enum {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 71d62105a20..e0d37f67983 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6435,7 +6435,10 @@ The implementation remark:
   the next reverse offload region is only executed after the previous
   one returned.
 @item OpenMP code that has a @code{requires} directive with
-  @code{unified_shared_memory} will remove any nvptx device from the
+  @code{unified_shared_memory} will run on nvptx devices if and only if
+  all of those support the
+  @code{CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}
+  attribute; otherwise, all nvptx device are removed from the
   list of available devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
   in the GCC manual.
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5aad3448a8d..c4b0f5dd4bf 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1201,8 +1201,23 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
   if (num_devices > 0
   && ((omp_requires_mask
 	   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+	   | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
 	   | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
 return -1;
+  /* Check whether automatic page migration is supported; if so, enable USM.
+ Currently, capabilities is per device type, hence, check all devices.  */
+  if (num_devices > 0
+  && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+for (int dev = 0; dev < num_devices; dev++)
+  {
+	int pi;
+	CUresult r;
+	r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, ,
+	  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
+	  dev);
+	if (r != CUDA_SUCCESS || pi == 0)
+	  return -1;
+  }
   return num_devices;
 }
 
diff --git a/libgomp/target.c 

[patch] OpenMP: Add -fopenmp-force-usm mode

2024-05-28 Thread Tobias Burnus
-fopenmp-force-usm can be useful for some badly written code. Explicity 
using 'omp requires' makes more sense but still. It might also make 
sense for testing purpose.


Unfortunately, I did not see a simple way of testing it. When trying it 
manually, I looked at the 'a.xamdgcn-amdhsa.c' -save-temps file, where 
gcn_data has the omp_requires_mask as second argument and testing showed 
that an explicit pragma and the -f... argument have the same result.


Alternative would be to move this code later, e.g. to lto-cgraph.cc's 
omp_requires_mask, which might be safer (as it avoids changing as many 
locations). On the other hand, it might require more special cases 
elsewhere.*


Comment, suggestions?

Tobias

*I am especially thinking about a global variable and "#pragma omp 
declare target". At least with 'omp requires self_maps' of OpenMP 6, it 
seems as if 'declare target enter(global_var)' should become 
'link(global_var)' where the global_var pointer is updated to point to 
the host version.


At least I don't see how otherwise the "all corresponding list items 
created by the 'enter' clauses specified by declare target directives in 
the compilation unit share storage with the original list items." could 
be fulfilled.


This will require generating different code for 'self_maps' (and, 
potentially / [RFC] 'unified_shared_memory') than normal code, which 
would be the first compiler code-gen change due to USM (→ 
GOMP_OFFLOAD_CAP_SHARED_MEM) for non-host devices.
OpenMP: Add -fopenmp-force-usm mode

Add an implicit 'omp requires unified_shared_memory' to all files that
use target constructs ("OMP_REQUIRES_TARGET_USED").  As constructed, the
diagnostic "'unified_shared_memory' clause used lexically after first target
construct or offloading API" is not inhibited.

The option has no effect without -fopenmp and does not affect OpenACC code,
matching what the directive would do.  The name of the command-line option
matches Clang's, added in LLVM 18.

gcc/c-family/ChangeLog:

	* c.opt (fopenmp-force-usm): New.
	* c.opt.urls: Regenerated

gcc/c/ChangeLog:

	* c-parser.cc (c_parser_omp_target_data, c_parser_omp_target_update,
	c_parser_omp_target_enter_data, c_parser_omp_target_exit_data,
	c_parser_omp_target): When setting OMP_REQUIRES_TARGET_USED, also
	set OMP_REQUIRES_UNIFIED_SHARED_MEMORY if -fopenmp-force-usm is
	in force.

gcc/cp/ChangeLog:

	* parser.cc (cp_parser_omp_target_data,
	cp_parser_omp_target_enter_data, cp_parser_omp_target_exit_data,
	cp_parser_omp_target_update, cp_parser_omp_target): When setting
	OMP_REQUIRES_TARGET_USED, also set OMP_REQUIRES_UNIFIED_SHARED_MEMORY
	if -fopenmp-force-usm is in force.


gcc/ChangeLog:

	* doc/invoke.texi (-fopenmp-force-usm): Document new option.

gcc/fortran/ChangeLog:

	* invoke.texi (-fopenmp-force-usm): Document new option.
	* lang.opt (fopenmp-force-usm): New.
	* lang.opt.urls: Regenerate.
	* parse.cc (gfc_parse_file): When setting
	OMP_REQUIRES_TARGET_USED, also set OMP_REQUIRES_UNIFIED_SHARED_MEMORY
	if -fopenmp-force-usm is in force.

 gcc/c-family/c.opt|  4 
 gcc/c-family/c.opt.urls   |  3 +++
 gcc/c/c-parser.cc | 50 +--
 gcc/cp/parser.cc  | 50 +--
 gcc/doc/invoke.texi   | 11 +--
 gcc/fortran/invoke.texi   |  7 +++
 gcc/fortran/lang.opt  |  4 
 gcc/fortran/lang.opt.urls |  3 +++
 gcc/fortran/parse.cc  | 10 --
 9 files changed, 118 insertions(+), 24 deletions(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index fb34c3b7031..4985cd61c48 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2136,6 +2136,10 @@ fopenmp
 C ObjC C++ ObjC++ LTO Var(flag_openmp)
 Enable OpenMP (implies -frecursive in Fortran).
 
+fopenmp-force-usm
+C ObjC C++ ObjC++ Var(flag_openmp_force_usm)
+Behave as if the source file contained OpenMP's 'requires unified_shared_memory'.
+
 fopenmp-simd
 C ObjC C++ ObjC++ Var(flag_openmp_simd)
 Enable OpenMP's SIMD directives.
diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls
index dd455d7c0dc..34b3a395e84 100644
--- a/gcc/c-family/c.opt.urls
+++ b/gcc/c-family/c.opt.urls
@@ -1222,6 +1222,9 @@ UrlSuffix(gcc/C-Dialect-Options.html#index-fopenacc-dim)
 fopenmp
 UrlSuffix(gcc/C-Dialect-Options.html#index-fopenmp) LangUrlSuffix_Fortran(gfortran/Fortran-Dialect-Options.html#index-fopenmp)
 
+fopenmp-force-usm
+UrlSuffix(gcc/C-Dialect-Options.html#index-fopenmp-force-usm) LangUrlSuffix_Fortran(gfortran/Fortran-Dialect-Options.html#index-fopenmp-force-usm)
+
 fopenmp-simd
 UrlSuffix(gcc/C-Dialect-Options.html#index-fopenmp-simd) LangUrlSuffix_Fortran(gfortran/Fortran-Dialect-Options.html#index-fopenmp-simd)
 
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 00f8bf4376e..93c9cd1c9d0 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -23849,8 +23849,14 @@ static tree
 c_parser_omp_target_data (location_t loc, c_parser *parser, bool *if_p)
 {
   if 

[gcc r15-867] testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:c0d78289fcd9c04110907f8cad90d7e1e5c55a44

commit r15-867-gc0d78289fcd9c04110907f8cad90d7e1e5c55a44
Author: Tobias Burnus 
Date:   Tue May 28 19:52:44 2024 +0200

testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/lastprivate-conditional-1.c: Remove
'{ dg-prune-output "not supported yet" }'.
* c-c++-common/gomp/requires-1.c: Likewise.
* c-c++-common/gomp/requires-2.c: Likewise.
* c-c++-common/gomp/reverse-offload-1.c: Likewise.
* g++.dg/gomp/requires-1.C: Likewise.
* gfortran.dg/gomp/requires-1.f90: Likewise.
* gfortran.dg/gomp/requires-2.f90: Likewise.
* gfortran.dg/gomp/requires-4.f90: Likewise.
* gfortran.dg/gomp/requires-5.f90: Likewise.
* gfortran.dg/gomp/requires-6.f90: Likewise.
* gfortran.dg/gomp/requires-7.f90: Likewise.

Diff:
---
 gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c | 2 --
 gcc/testsuite/c-c++-common/gomp/requires-1.c| 2 --
 gcc/testsuite/c-c++-common/gomp/requires-2.c| 2 --
 gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c | 2 --
 gcc/testsuite/g++.dg/gomp/requires-1.C  | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-1.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-2.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-4.f90   | 1 -
 gcc/testsuite/gfortran.dg/gomp/requires-5.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-6.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-7.f90   | 1 -
 11 files changed, 20 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c 
b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
index 722aba79a52..d4ef49690e8 100644
--- a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
@@ -60,5 +60,3 @@ bar (int *p)
s = u;
   }
 }
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-1.c 
b/gcc/testsuite/c-c++-common/gomp/requires-1.c
index e1f2e3a503f..a47ec659566 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-1.c
@@ -10,5 +10,3 @@ foo ()
 
 #pragma omp requires unified_shared_memory unified_address
 #pragma omp requires atomic_default_mem_order(seq_cst)
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-2.c 
b/gcc/testsuite/c-c++-common/gomp/requires-2.c
index 717b65caeea..d7430b1b1a4 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-2.c
@@ -6,5 +6,3 @@
 #pragma omp requires dynamic_allocators , dynamic_allocators   /* { dg-error 
"too many 'dynamic_allocators' clauses" } */
 #pragma omp requires atomic_default_mem_order(seq_cst) 
atomic_default_mem_order(seq_cst)   /* { dg-error "too many 
'atomic_default_mem_order' clauses" } */
 #pragma omp requires atomic_default_mem_order (seq_cst)/* { dg-error 
"more than one 'atomic_default_mem_order' clause in a single compilation unit" 
} */
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c 
b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
index 9a3fa5230f8..ddc3c2c6be1 100644
--- a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
@@ -6,8 +6,6 @@
 /* { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare 
target\\)\\)\[\n\r\]*int called_in_target2" 1 "omplower" } }  */
 /* { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare target, 
omp declare target block\\)\\)\[\n\r\]*void tg_fn" 1 "omplower" } }  */
 
-/* { dg-prune-output "'reverse_offload' clause on 'requires' directive not 
supported yet" } */
-
 #pragma omp requires reverse_offload
 
 extern int add_3 (int);
diff --git a/gcc/testsuite/g++.dg/gomp/requires-1.C 
b/gcc/testsuite/g++.dg/gomp/requires-1.C
index aefeb288dad..5ca5e006da1 100644
--- a/gcc/testsuite/g++.dg/gomp/requires-1.C
+++ b/gcc/testsuite/g++.dg/gomp/requires-1.C
@@ -8,5 +8,3 @@ namespace M {
 #pragma omp requires atomic_default_mem_order(seq_cst)
 }
 }
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90 
b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
index b115a654e71..19007834c45 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
@@ -9,5 +9,3 @@ subroutine bar
 !$omp requires unified_shared_memory unified_address
 !$omp requires atomic_de

[Patch] testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

2024-05-28 Thread Tobias Burnus
Improve test coverage by removing 'prune-output' given that the features 
are implemented in the meanwhile.


Comments, suggestions? Otherwise I will commit the patch as obvious.

Tobias
testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/lastprivate-conditional-1.c: Remove
	'{ dg-prune-output "not supported yet" }'.
	* c-c++-common/gomp/requires-1.c: Likewise.
	* c-c++-common/gomp/requires-2.c: Likewise.
	* c-c++-common/gomp/reverse-offload-1.c: Likewise.
	* g++.dg/gomp/requires-1.C: Likewise.
	* gfortran.dg/gomp/requires-1.f90: Likewise.
	* gfortran.dg/gomp/requires-2.f90: Likewise.
	* gfortran.dg/gomp/requires-4.f90: Likewise.
	* gfortran.dg/gomp/requires-5.f90: Likewise.
	* gfortran.dg/gomp/requires-6.f90: Likewise.
	* gfortran.dg/gomp/requires-7.f90: Likewise.

 gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c | 2 --
 gcc/testsuite/c-c++-common/gomp/requires-1.c| 2 --
 gcc/testsuite/c-c++-common/gomp/requires-2.c| 2 --
 gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c | 2 --
 gcc/testsuite/g++.dg/gomp/requires-1.C  | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-1.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-2.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-4.f90   | 1 -
 gcc/testsuite/gfortran.dg/gomp/requires-5.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-6.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-7.f90   | 1 -
 11 files changed, 20 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
index 722aba79a52..d4ef49690e8 100644
--- a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
@@ -63,2 +62,0 @@ bar (int *p)
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-1.c b/gcc/testsuite/c-c++-common/gomp/requires-1.c
index e1f2e3a503f..a47ec659566 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-1.c
@@ -13,2 +12,0 @@ foo ()
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-2.c b/gcc/testsuite/c-c++-common/gomp/requires-2.c
index 717b65caeea..d7430b1b1a4 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-2.c
@@ -9,2 +8,0 @@
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
index 9a3fa5230f8..ddc3c2c6be1 100644
--- a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
@@ -9,2 +8,0 @@
-/* { dg-prune-output "'reverse_offload' clause on 'requires' directive not supported yet" } */
-
diff --git a/gcc/testsuite/g++.dg/gomp/requires-1.C b/gcc/testsuite/g++.dg/gomp/requires-1.C
index aefeb288dad..5ca5e006da1 100644
--- a/gcc/testsuite/g++.dg/gomp/requires-1.C
+++ b/gcc/testsuite/g++.dg/gomp/requires-1.C
@@ -11,2 +10,0 @@ namespace M {
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
index b115a654e71..19007834c45 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
@@ -12,2 +11,0 @@ end
-
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-2.f90
index 5f11a7bfb2a..f144d391034 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-2.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-2.f90
@@ -13,2 +12,0 @@ end
-
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-4.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-4.f90
index c870a2840d3..9d936197f8f 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-4.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-4.f90
@@ -36 +35,0 @@ end
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-5.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-5.f90
index e719e929294..87be933ba49 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-5.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-5.f90
@@ -15,2 +14,0 @@ end
-
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-6.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-6.f90
index cabd3d94a90..b20c218dd6b 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-6.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-6.f90
@@ -15,2 +14,0 @@ end
-
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-7.f90 

[gcc r12-10476] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:e0b2c4f90f908a9bca4038c7ae0d8ca6ee157d8f

commit r12-10476-ge0b2c4f90f908a9bca4038c7ae0d8ca6ee157d8f
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index c30cdfd37f9..9393ca10b06 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3083,7 +3083,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[gcc r13-8805] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:3185cfe495944e6e5d000ccd820bed2e6f10cd6c

commit r13-8805-g3185cfe495944e6e5d000ccd820bed2e6f10cd6c
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index aa0dea50089..455b61aa564 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


gcc-wwwdocs branch master updated. 30f0c75e77a10942590037b749a64db74b0c8480

2024-05-28 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  30f0c75e77a10942590037b749a64db74b0c8480 (commit)
  from  582d3e94dbcdf2aa63134532dc66b01d651d7a1d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 30f0c75e77a10942590037b749a64db74b0c8480
Author: Tobias Burnus 
Date:   Tue May 28 15:20:50 2024 +0200

gcc-15/changes.html: Fortran - mention F2023 logical-kind additions

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index a89a7f2b..b59fd3be 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -49,7 +49,13 @@ a work-in-progress.
 
 
 
-
+Fortran
+
+
+  Fortran 2023: The selected_logical_kind intrinsic function
+  and, in the ISO_FORTRAN_ENV module, the named constants
+  logical{8,16,32,64} and real16 were added.
+
 
 
 

---

Summary of changes:
 htdocs/gcc-15/changes.html | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)


hooks/post-receive
-- 
gcc-wwwdocs


[gcc r14-10251] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:dbeb3d127da07963ecaa26680da62a255199e9c2

commit r14-10251-gdbeb3d127da07963ecaa26680da62a255199e9c2
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 4e26af21b46..7cb7c2e6949 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[wwwdocs][patch] gcc-15/changes.html: Fortran - mention F2023 logical-kind additions

2024-05-28 Thread Tobias Burnus
Let's make https://gcc.gnu.org/gcc-15/changes.html a bit more useful … 
While there were several useful Fortran commits already, only one seems 
to be about a new feature.


Thus, document selected_logical_kind and the ISO_FORTRAN_ENV additions.

Comments or suggestions before I commit it?

Tobias
Title: GCC 15 Release Series — Changes, New Features, and Fixes








GCC 15 Release SeriesChanges, New Features, and Fixes


This page is a "brief" summary of some of the huge number of improvements
in GCC 15.



Note: GCC 15 has not been released yet, so this document is
a work-in-progress.


Caveats

  ...




General Improvements


New Languages and Language specific improvements










Fortran


  Fortran 2023: The selected_logical_kind intrinsic function
  and, in the ISO_FORTRAN_ENV module, the named constants
  logical{8,16,32,64} and real16 were added.








New Targets and Target Specific Improvements








































Operating Systems



























Other significant improvements










Re: [PATCH 6/7] OpenMP: Fortran front-end support for dispatch + adjust_args

2024-05-28 Thread Tobias Burnus

Hi PA, hi all,

two remarks while quickly browsing the code:

Paul-Antoine Arras:

+ if (n->sym->ts.type != BT_DERIVED
+ || !n->sym->ts.u.derived->ts.is_iso_c)
+   {
+ gfc_error ("argument list item %qs in "
+"% at %L must be of "
+"TYPE(C_PTR)",
+n->sym->name, >where);


I think you need to rule out 'c_funptr' as well, e.g. via:

|| (n->sym->ts.u.derived->intmod_sym_id
!= ISOCBINDING_PTR)))

I do note that in openmp.cc, we have one check which checks explicitly 
for c_ptr and one existing one which only checks for (c_ptr or 
c_funptr); can you fix that one as well?


* * *

But I mainly miss an update to 'module.cc' for the 'declare variant' 
change; the 'adjust_args' (for 'need_device_ptr', only) list items have

to be saved in the .mod file - otherwise the following will not work:

-aux.f90
! { dg-do compile { target skip-all-targets } }
module my_mod
  ...
  !$omp declare variant ... adjust_args(need_device_ptr: ...)
  ...
end module

.f90
{ dg-do ...
! { dg-additional-sources -aux.f90 }
  ...
  call 
  ...
  !$omp displatch
   call 
end


For C++ modules, it should be fine as those for those, the tree is dumped.

Tobias


Re: [Patch] Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

2024-05-21 Thread Tobias Burnus

Hi Bernhard,

rep.dot@gmail.com wrote:

library such as @url{http://opencoarrays.org} needs to be linked.

Maybe use https?


Works, but as the certificate is not valid, it requires to ignore the 
errors in a browser, which is a worse user experience.


The error is, e.g.,

"curl: (60) SSL certificate problem: self-signed certificate"

Or at 
https://www.ssllabs.com/ssltest/analyze.html?d=www.opencoarrays.org=on


"Common names: invalid-sni.invalid / Issuer: invalid-sni.invalid  
(Self-signed)"


@Damian: Can you fix the server to actually have a valid certificate?

Tobias


[gcc r15-749] contrib/gcc-changelog/git_update_version.py: Improve diagnostic

2024-05-21 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:9596f6567ce6fdf94227b97ac28d3549f421ef73

commit r15-749-g9596f6567ce6fdf94227b97ac28d3549f421ef73
Author: Tobias Burnus 
Date:   Tue May 21 10:13:13 2024 +0200

contrib/gcc-changelog/git_update_version.py: Improve diagnostic

contrib/ChangeLog:

* gcc-changelog/git_update_version.py: Add '-i'/'--ignore' argument
to add to-be-ignored commits via the command line.
(ignored_commits): Rename from IGNORED_COMMITS and change
type from tuple to set.
(prepend_to_changelog_files): Show git hash if errors occurred.
(update_current_branch): Mark argument as optional by defaulting
to None.

Diff:
---
 contrib/gcc-changelog/git_update_version.py | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/contrib/gcc-changelog/git_update_version.py 
b/contrib/gcc-changelog/git_update_version.py
index 24f6c43d0b2..c69a3a6897a 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -22,6 +22,7 @@ import argparse
 import datetime
 import logging
 import os
+import re
 
 from git import Repo
 
@@ -30,7 +31,7 @@ from git_repository import parse_git_revisions
 current_timestamp = datetime.datetime.now().strftime('%Y%m%d\n')
 
 # Skip the following commits, they cannot be correctly processed
-IGNORED_COMMITS = (
+ignored_commits = {
 'c2be82058fb40f3ae891c68d185ff53e07f14f45',
 '04a040d907a83af54e0a98bdba5bfabc0ef4f700',
 '2e96b5f14e4025691b57d2301d71aa6092ed44bc',
@@ -41,7 +42,7 @@ IGNORED_COMMITS = (
 '040e5b0edbca861196d9e2ea2af5e805769c8d5d',
 '8057f9aa1f7e70490064de796d7a8d42d446caf8',
 '109f1b28fc94c93096506e3df0c25e331cef19d0',
-'39f81924d88e3cc197fc3df74204c9b5e01e12f7')
+'39f81924d88e3cc197fc3df74204c9b5e01e12f7'}
 
 FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT,
@@ -58,6 +59,7 @@ def read_timestamp(path):
 
 def prepend_to_changelog_files(repo, folder, git_commit, add_to_git):
 if not git_commit.success:
+logging.info(f"While processing {git_commit.info.hexsha}:")
 for error in git_commit.errors:
 logging.info(error)
 raise AssertionError()
@@ -93,13 +95,15 @@ parser.add_argument('-d', '--dry-mode',
  ' is expected')
 parser.add_argument('-c', '--current', action='store_true',
 help='Modify current branch (--push argument is ignored)')
+parser.add_argument('-i', '--ignore', action='append',
+help='list of commits to ignore')
 args = parser.parse_args()
 
 repo = Repo(args.git_path)
 origin = repo.remotes['origin']
 
 
-def update_current_branch(ref_name):
+def update_current_branch(ref_name=None):
 commit = repo.head.commit
 commit_count = 1
 while commit:
@@ -123,7 +127,7 @@ def update_current_branch(ref_name):
 head = head.parents[1]
 commits = parse_git_revisions(args.git_path, '%s..%s'
   % (commit.hexsha, head.hexsha), ref_name)
-commits = [c for c in commits if c.info.hexsha not in IGNORED_COMMITS]
+commits = [c for c in commits if c.info.hexsha not in ignored_commits]
 for git_commit in reversed(commits):
 prepend_to_changelog_files(repo, args.git_path, git_commit,
not args.dry_mode)
@@ -153,6 +157,9 @@ def update_current_branch(ref_name):
 else:
 logging.info('DATESTAMP unchanged')
 
+if args.ignore is not None:
+for item in args.ignore:
+ignored_commits.update(set(i for i in re.split(r'\s*,\s*|\s+', item)))
 
 if args.current:
 logging.info('=== Working on the current branch ===')


Re: [Patch] contrib/gcc-changelog/git_update_version.py: Improve diagnostic

2024-05-21 Thread Tobias Burnus

Hi Jakub,

Jakub Jelinek wrote:

On Mon, May 20, 2024 at 08:31:02AM +0200, Tobias Burnus wrote:

Hmm, there were now two daily bumps: [...] I really wonder why.

Because I've done it by hand.


Okay, that explains it.

I still do not understand why it slipped through at the first place; I 
tried old versions down to r12-709-g772e5e82e3114f and it still FAIL for 
the invalid commit ("ERR: cannot find a ChangeLog location in message").


Thus, I wonder whether the commit hook is active at all?!?


I have in ~gccadmin a gcc-changelog copy and adjusted update_version_git
script which doesn't use contrib/gcc-changelog subdirectory from the
checkout it makes but from the ~gccadmin directory,

[...]

I'm already using something similar in
my hack (just was doing it for even successful commits, but I think your
patch is better).
And, I think best would be if update_version_git script simply
accepted a list of ignored commits from the command line too,
passed it to the git_update_version.py script and that one
added those to IGNORED_COMMITS.


Updated version:

* Uses my diagnostic

* Adds an -i/--ignore argument for commits. Permits to use '-i hash1  -i 
hash2' but also '-i hash1,hash2' or '-i "hash1 hash2'


* I changed the global variable to lower case as Python's style guide 
states that all uppercase variables is for constants.


* The '=None' matches one of the current usages (no argument passed); 
hence, it is now explicit and 'pylint' is happy.


OK for mainline?

Tobias

PS: I have not updated the hashes. If needed/wanted, I leave that to 
you, Jakub.
contrib/gcc-changelog/git_update_version.py: Improve diagnostic

contrib/ChangeLog:

	* gcc-changelog/git_update_version.py: Add '-i'/'--ignore' argument
	to add to-be-ignored commits via the command line.
	(ignored_commits): Rename from IGNORED_COMMITS and change
	type from tuple to set.
	(prepend_to_changelog_files): Show git hash if errors occurred.
	(update_current_branch): Mark argument as optional by defaulting
	to None.

 contrib/gcc-changelog/git_update_version.py | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/contrib/gcc-changelog/git_update_version.py b/contrib/gcc-changelog/git_update_version.py
index 24f6c43d0b2..c69a3a6897a 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -22,6 +22,7 @@ import argparse
 import datetime
 import logging
 import os
+import re
 
 from git import Repo
 
@@ -30,7 +31,7 @@ from git_repository import parse_git_revisions
 current_timestamp = datetime.datetime.now().strftime('%Y%m%d\n')
 
 # Skip the following commits, they cannot be correctly processed
-IGNORED_COMMITS = (
+ignored_commits = {
 'c2be82058fb40f3ae891c68d185ff53e07f14f45',
 '04a040d907a83af54e0a98bdba5bfabc0ef4f700',
 '2e96b5f14e4025691b57d2301d71aa6092ed44bc',
@@ -41,7 +42,7 @@ IGNORED_COMMITS = (
 '040e5b0edbca861196d9e2ea2af5e805769c8d5d',
 '8057f9aa1f7e70490064de796d7a8d42d446caf8',
 '109f1b28fc94c93096506e3df0c25e331cef19d0',
-'39f81924d88e3cc197fc3df74204c9b5e01e12f7')
+'39f81924d88e3cc197fc3df74204c9b5e01e12f7'}
 
 FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT,
@@ -58,6 +59,7 @@ def read_timestamp(path):
 
 def prepend_to_changelog_files(repo, folder, git_commit, add_to_git):
 if not git_commit.success:
+logging.info(f"While processing {git_commit.info.hexsha}:")
 for error in git_commit.errors:
 logging.info(error)
 raise AssertionError()
@@ -93,13 +95,15 @@ parser.add_argument('-d', '--dry-mode',
  ' is expected')
 parser.add_argument('-c', '--current', action='store_true',
 help='Modify current branch (--push argument is ignored)')
+parser.add_argument('-i', '--ignore', action='append',
+help='list of commits to ignore')
 args = parser.parse_args()
 
 repo = Repo(args.git_path)
 origin = repo.remotes['origin']
 
 
-def update_current_branch(ref_name):
+def update_current_branch(ref_name=None):
 commit = repo.head.commit
 commit_count = 1
 while commit:
@@ -123,7 +127,7 @@ def update_current_branch(ref_name):
 head = head.parents[1]
 commits = parse_git_revisions(args.git_path, '%s..%s'
   % (commit.hexsha, head.hexsha), ref_name)
-commits = [c for c in commits if c.info.hexsha not in IGNORED_COMMITS]
+commits = [c for c in commits if c.info.hexsha not in ignored_commits]
 for git_commit in reversed(commits):
 prepend_to_changelog_files(repo, args.git_path, git_commit,
not args.dry_mode)
@@ -153,6 +157,9 @@ def update_current_branch(ref_name):
 else:
 logging.info('DATESTAMP unchanged')
 
+if args.ignore is not None:
+  

[gcc r15-658] Fortran: Fix SHAPE for zero-size arrays

2024-05-20 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:b701306a9b38bd74cdc26c7ece5add22f2203b56

commit r15-658-gb701306a9b38bd74cdc26c7ece5add22f2203b56
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 80dc3426ab04..912c1000e186 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index ..e672e1ff9f95
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[gcc r15-657] Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

2024-05-20 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:544d5dcc9150c0ea278fba79ea515f5a87732ce7

commit r15-657-g544d5dcc9150c0ea278fba79ea515f5a87732ce7
Author: Tobias Burnus 
Date:   Mon May 20 08:33:31 2024 +0200

Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

gcc/fortran/ChangeLog:

* invoke.texi (fcoarray): Link to OpenCoarrays.org;
mention libcaf_single.

Diff:
---
 gcc/fortran/invoke.texi | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index 40e8e4a7cdde..6bc42afe2c4f 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -1753,7 +1753,10 @@ Single-image mode, i.e. @code{num_images()} is always 
one.
 
 @item @samp{lib}
 Library-based coarray parallelization; a suitable GNU Fortran coarray
-library needs to be linked.
+library such as @url{http://opencoarrays.org} needs to be linked.
+Alternatively, GCC's @code{libcaf_single} library can be linked,
+albeit it only supports a single image.
+
 @end table


[Patch] contrib/gcc-changelog/git_update_version.py: Improve diagnostic (was: [Patch] contrib/gcc-changelog/git_update_version.py: Add ignore commit, improve diagnostic)

2024-05-20 Thread Tobias Burnus

Hmm, there were now two daily bumps:

Date:   Mon May 20 00:16:30 2024 +

Date:   Sun May 19 18:15:28 2024 +

I really wonder why.

I guess, the 'ignore commit' is hence not needed – but I think the 
improved diagnostic part still makes sense.


See updated patch.

On May 19, 24 Tobias Burnus wrote:

I noticed that the last bump happened on Thursday.

* * *

The error is according to
https://gcc.gnu.org/pipermail/gccadmin/2024q2/021298.html

2024-05-19 00:17:28,643:INFO:root:cannot find a ChangeLog location in 
message


That's the commit
---
    Revert "Revert: "Enable prange support.""

    This reverts commit d7bb8eaade3cd3aa70715c8567b4d7b08098e699 and 
enables prange

    support again.
---

* * * The attached patch adds this commit to the ignore list and helps 
with the diagnosis by showing the failing hash in the error message.


OK for mainline?

Post commit: Can someone install the new version + fix the ChangeLog 
for the ignored commit?


* * *

What I do not understand: Why does this commit get applied? I do see 
for both


contrib/gcc-changelog/git_check_commit.py -v -p 
da73261ce7731be7f2b164f1db796878cdc23365


and

contrib/gcc-changelog/git_email.py 
0001-Revert-Revert-Enable-prange-support.patch the error above. - And 
I do not understand why it made it past the commit check but now fails?


Likewise for8057f9aa1f7e70490064de796d7a8d42d446caf8

Does the commit hook use an older version of the check scripts? Does 
it ignore the errors? Or what goes wrong here? Any idea?


TobiasFrom f56b1764f2b5c2c83c6852607405e5be0a763a2c Mon Sep 17 00:00:00 2001
From: Tobias Burnus 
Date: Sun, 19 May 2024 08:17:42 +0200
Subject: [PATCH] contrib/gcc-changelog/git_update_version.py: Improve diagnostic

contrib/ChangeLog:

* gcc-changelog/git_update_version.py (prepend_to_changelog_files): Output
	git hash in case errors occurred.

diff --git a/contrib/gcc-changelog/git_update_version.py b/contrib/gcc-changelog/git_update_version.py
index 24f6c43d0b2..ec0151b83fe 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -58,6 +58,7 @@ def read_timestamp(path):
 
 def prepend_to_changelog_files(repo, folder, git_commit, add_to_git):
 if not git_commit.success:
+logging.info(f"While processing {git_commit.info.hexsha}:")
 for error in git_commit.errors:
 logging.info(error)
 raise AssertionError()
-- 
2.45.0



[Patch] Fortran: Fix SHAPE for zero-size arrays

2024-05-19 Thread Tobias Burnus
That is for https://gcc.gnu.org/PR115150 – a GCC 12/13/14/15 regression, 
caused when switching from a libgomp call to inline code and missing the 
corner case of zero-size arrays ...


OK for mainline + all affected branches?

Tobias
Fortran: Fix SHAPE for zero-size arrays

	PR fortran/115150

gcc/fortran/ChangeLog:

	* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
	for zero-size arrays

gcc/testsuite/ChangeLog:

	* gfortran.dg/shape_12.f90: New test.

 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 80dc3426ab0..912c1000e18 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, enum gfc_isym_id op)
   lbound, gfc_index_one_node);
 	}
   else if (op == GFC_ISYM_SHAPE)
-	se->expr = size;
+	se->expr = fold_build2_loc (input_location, MAX_EXPR,
+gfc_array_index_type, size,
+gfc_index_zero_node);
   else
 	gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[Patch] Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

2024-05-19 Thread Tobias Burnus
I noticed that gfortran's coarray support did not link to the 
http://www.opencoarrays.org/


As that library is needed to support parallelization, it makes sense to 
have the link.


Motivated by someone claiming at ISC-HPC that GCC only supports a single 
image.


And also motivated by Damian's presentation, which showed that 
gfortran's coarrays could successfully run the ICAR atmospheric model 
with 25,600 processes (OpenCoarrays with OpenSHMEM backend), which 
definitely is more than one image :-)


I think mentioning the existing libcaf_single is still useful, even 
though it is only of limited use (except that it does ship with GCC and 
permits to do some testings. Especially, it is used by GCC's testsuite).


OK for mainline?

Tobias
Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

gcc/fortran/ChangeLog:

	* invoke.texi (fcoarray): Link to OpenCoarrays.org;
	mention libcaf_single.

 gcc/fortran/invoke.texi | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index 40e8e4a7cdd..78a2910b8d8 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -1753,7 +1753,10 @@ Single-image mode, i.e. @code{num_images()} is always one.
 
 @item @samp{lib}
 Library-based coarray parallelization; a suitable GNU Fortran coarray
-library needs to be linked.
+library needs to be linked such as @url{http://opencoarrays.org}.
+Alternatively, GCC's @code{libcaf_single} library can be linked,
+albeit it only supports a single image.
+
 @end table
 
 


[Patch] contrib/gcc-changelog/git_update_version.py: Add ignore commit, improve diagnostic

2024-05-19 Thread Tobias Burnus

I noticed that the last bump happened on Thursday.

* * *

The error is according to
https://gcc.gnu.org/pipermail/gccadmin/2024q2/021298.html

2024-05-19 00:17:28,643:INFO:root:cannot find a ChangeLog location in message

That's the commit
---
Revert "Revert: "Enable prange support.""

This reverts commit d7bb8eaade3cd3aa70715c8567b4d7b08098e699 and enables 
prange
support again.
---

* * * The attached patch adds this commit to the ignore list and helps 
with the diagnosis by showing the failing hash in the error message. OK 
for mainline? Post commit: Can someone install the new version + fix the 
ChangeLog for the ignored commit? * * * What I do not understand: Why does this commit get applied? I do see for both
contrib/gcc-changelog/git_check_commit.py -v -p 
da73261ce7731be7f2b164f1db796878cdc23365 and 
contrib/gcc-changelog/git_email.py 
0001-Revert-Revert-Enable-prange-support.patch the error above. - And I 
do not understand why it made it past the commit check but now fails?

Likewise for8057f9aa1f7e70490064de796d7a8d42d446caf8
Does the commit hook use an older version of the check scripts? Does it 
ignore the errors? Or what goes wrong here? Any idea? Tobias
From f56b1764f2b5c2c83c6852607405e5be0a763a2c Mon Sep 17 00:00:00 2001
From: Tobias Burnus 
Date: Sun, 19 May 2024 08:17:42 +0200
Subject: [PATCH] contrib/gcc-changelog/git_update_version.py: Add ignore
 commit, improve diagnostic

contrib/ChangeLog:

* gcc-changelog/git_update_version.py (IGNORED_COMMITS): Add
	cfceb070e2aea3cef9bd1f50d8d030c51449f45b.
	(prepend_to_changelog_files): Output git hash in case of error.

diff --git a/contrib/gcc-changelog/git_update_version.py b/contrib/gcc-changelog/git_update_version.py
index 24f6c43d0b2..ec0151b83fe 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -41,7 +41,8 @@ IGNORED_COMMITS = (
 '040e5b0edbca861196d9e2ea2af5e805769c8d5d',
 '8057f9aa1f7e70490064de796d7a8d42d446caf8',
 '109f1b28fc94c93096506e3df0c25e331cef19d0',
-'39f81924d88e3cc197fc3df74204c9b5e01e12f7')
+'39f81924d88e3cc197fc3df74204c9b5e01e12f7',
+'da73261ce7731be7f2b164f1db796878cdc23365')
 
 FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT,
@@ -58,6 +59,7 @@ def read_timestamp(path):
 
 def prepend_to_changelog_files(repo, folder, git_commit, add_to_git):
 if not git_commit.success:
+logging.info(f"While processing {git_commit.info.hexsha}:")
 for error in git_commit.errors:
 logging.info(error)
 raise AssertionError()
-- 
2.45.0



[wwwdocs,committed] projects/gomp: Update doc links for GCC 14

2024-05-14 Thread Tobias Burnus

Minor update – to include GCC 14 and update mainline to 15.

I also replaced the doc links to the latest release; shouldn't matter 
for the status but it is nicer nonetheless.


Tobias
commit 6d76756d2070040c35e7991a626805a736edea1d
Author: Tobias Burnus 
Date:   Tue May 14 09:34:47 2024 +0200

projects/gomp: Update doc links for GCC 14

And link to latest GCC 12 + 13 release version

diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 05b81f1e..94bda5ff 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -144,10 +144,12 @@ filing a bug report.
 
 Implementation status in libgomp manual:
 https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Implementation-Status.html;
->Mainline (GCC 14),
-https://gcc.gnu.org/onlinedocs/gcc-13.1.0/libgomp/OpenMP-Implementation-Status.html;
+>Mainline (GCC 15),
+https://gcc.gnu.org/onlinedocs/gcc-14.1.0/libgomp/OpenMP-Implementation-Status.html;
+>GCC 14,
+https://gcc.gnu.org/onlinedocs/gcc-13.2.0/libgomp/OpenMP-Implementation-Status.html;
 >GCC 13,
-https://gcc.gnu.org/onlinedocs/gcc-12.1.0/libgomp/OpenMP-Implementation-Status.html;
+https://gcc.gnu.org/onlinedocs/gcc-12.3.0/libgomp/OpenMP-Implementation-Status.html;
 >GCC 12.
 
 Disclaimer: A feature might be only fully supported in a later GCC version


gcc-wwwdocs branch master updated. 6d76756d2070040c35e7991a626805a736edea1d

2024-05-14 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  6d76756d2070040c35e7991a626805a736edea1d (commit)
  from  de51d0fe7b7f29ce6037224f33a3d82281aac88e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 6d76756d2070040c35e7991a626805a736edea1d
Author: Tobias Burnus 
Date:   Tue May 14 09:34:47 2024 +0200

projects/gomp: Update doc links for GCC 14

And link to latest GCC 12 + 13 release version

diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 05b81f1e..94bda5ff 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -144,10 +144,12 @@ filing a bug report.
 
 Implementation status in libgomp manual:
 https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Implementation-Status.html;
->Mainline (GCC 14),
-https://gcc.gnu.org/onlinedocs/gcc-13.1.0/libgomp/OpenMP-Implementation-Status.html;
+>Mainline (GCC 15),
+https://gcc.gnu.org/onlinedocs/gcc-14.1.0/libgomp/OpenMP-Implementation-Status.html;
+>GCC 14,
+https://gcc.gnu.org/onlinedocs/gcc-13.2.0/libgomp/OpenMP-Implementation-Status.html;
 >GCC 13,
-https://gcc.gnu.org/onlinedocs/gcc-12.1.0/libgomp/OpenMP-Implementation-Status.html;
+https://gcc.gnu.org/onlinedocs/gcc-12.3.0/libgomp/OpenMP-Implementation-Status.html;
 >GCC 12.
 
 Disclaimer: A feature might be only fully supported in a later GCC version

---

Summary of changes:
 htdocs/projects/gomp/index.html | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


[patch] [gcn][nvptx] Add warning to mkoffload for 32bit host code

2024-04-25 Thread Tobias Burnus

Motivated by a surprise of a colleague that with -m32,
no offload dumps were created; that's because mkoffload
does not process host binaries when the are 32bit (i.e. ilp32).

Internally, that done as follows: The host compiler passes to
'mkoffload' the used host ABI, i.e. -foffload-abi=ilp32 or -foffload-abi=lp64

That's done via TARGET_OFFLOAD_OPTIONS, which is supported by aarch64, i386, 
and rs6000.

While it is sensible (albeit not strictly required) that GCC requires that
the host and device side agree and that only 64bit is implemented for the
device side, it can be confusing that silently no offloading code is generated.


Hence, I propose to print a warning in that case - as implemented in the 
attached patch:

$ gcc -fopenmp -m32 test.c
nvptx mkoffload: warning: offload code generation skipped: offloading with 
32-bit host code is currently not supported
gcn mkoffload: warning: offload code generation skipped: offloading with 32-bit 
host code is currently not supported

* * *

This shouldn't have any effect on offload builds using -m64
and non-offload builds – while several testcases already have
issues with '-m32' when offloading is enabled or an offloading
device is available.

To make it not worse, this patch adds some pruning and for
a subset of the failing testcases, I added code to avoids FAILS.
There are some more fails, but those aren't new.

Comments, remarks, suggestions?
Is the mkoffload.cc part is okay?

Tobias
[gcn][nvptx] Add warning to mkoffload for 32bit host code

mkoffload in principle handles 32bit and 64bit offload targets,
but 32bit support has no been implemented.  Before this patch,
offloading is then silently disabled for the respective target.

With the patch, the user gets a warning by mkoffload (and the
programm continues to be build with out offloading code).

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (main): Warn for -foffload-abi=ilp32
	that no offload code will be generated.
	* config/nvptx/mkoffload.cc (main): Likewise.

libgomp/ChangeLog:

	* testsuite/lib/libgomp-dg.exp (libgomp-dg-prune): Prune warning
	by mkoffload that 32-bit offloading is not supported.
	* testsuite/libgomp.c-c++-common/requires-1.c: Silence a FAIL for
	'ia32' targets as for them no offload code is generated.
	* testsuite/libgomp.c-c++-common/requires-3.c: Likewise.
	* testsuite/libgomp.c-c++-common/requires-7.c: Likewise.
	* testsuite/libgomp.c-c++-common/variable-not-offloaded.c: Likewise.
	* testsuite/libgomp.fortran/requires-1.f90: Likewise.

 gcc/config/gcn/mkoffload.cc|  5 -
 gcc/config/nvptx/mkoffload.cc  |  5 -
 libgomp/testsuite/lib/libgomp-dg.exp   |  3 +++
 libgomp/testsuite/libgomp.c-c++-common/requires-1.c|  8 +---
 libgomp/testsuite/libgomp.c-c++-common/requires-3.c|  8 +---
 libgomp/testsuite/libgomp.c-c++-common/requires-7.c| 10 ++
 .../testsuite/libgomp.c-c++-common/variable-not-offloaded.c|  4 ++--
 libgomp/testsuite/libgomp.fortran/requires-1.f90   |  8 +---
 8 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 9a438de331a..c37c269d4d2 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -1143,7 +1143,10 @@ main (int argc, char **argv)
 fatal_error (input_location, "cannot open %qs", gcn_cfile_name);
 
   /* Currently, we only support offloading in 64-bit configurations.  */
-  if (offload_abi == OFFLOAD_ABI_LP64)
+  if (offload_abi == OFFLOAD_ABI_ILP32)
+warning (0, "offload code generation skipped: offloading with 32-bit host "
+		"code is currently not supported");
+  else if (offload_abi == OFFLOAD_ABI_LP64)
 {
   const char *mko_dumpbase = concat (dumppfx, ".mkoffload", NULL);
   const char *hsaco_dumpbase = concat (dumppfx, ".mkoffload.hsaco", NULL);
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index 503b1abcefd..a7ff32cf8bd 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -798,7 +798,10 @@ main (int argc, char **argv)
 
   /* PR libgomp/65099: Currently, we only support offloading in 64-bit
  configurations.  */
-  if (offload_abi == OFFLOAD_ABI_LP64)
+  if (offload_abi == OFFLOAD_ABI_ILP32)
+warning (0, "offload code generation skipped: offloading with 32-bit host "
+		"code is currently not supported");
+  else if (offload_abi == OFFLOAD_ABI_LP64)
 {
   char *mko_dumpbase = concat (dumppfx, ".mkoffload", NULL);
   if (save_temps)
diff --git a/libgomp/testsuite/lib/libgomp-dg.exp b/libgomp/testsuite/lib/libgomp-dg.exp
index ebf78e17e6d..9c9a5f2ed4b 100644
--- a/libgomp/testsuite/lib/libgomp-dg.exp
+++ b/libgomp/testsuite/lib/libgomp-dg.exp
@@ -3,5 +3,8 @@ proc libgomp-dg-test { prog do_what extra_tool_flags } {
 }
 
 proc libgomp-dg-prune { system text } {
+global additional_prunes
+

Generated files in libgfortran for Fortran intrinsic procedures (was: Updated Sourceware infrastructure plans)

2024-04-18 Thread Tobias Burnus

Hi Janne,

Janne Blomqvist wrote:

back when I was active I did think about this
issue. IMHO the best of my ideas was to convert these into C++
templates.


I think this will work – but we have to be super careful:

With C++, there is the problem that we definitely do not want to add 
dependency on libstdc++ nor to use some features which require special 
hardware support (like exceptions [always bad], symbol aliases, ...). — 
On some systems, a full C++ support might be not available, like 
embedded systems (including some odd embedded OS) or offloading devices.


The libstdc++ dependency would be detected by linking as we currently 
do. For in-language features, we have to ensure the appropriate flags 
-fno-exceptions (and probably a few more). And it should be clear what 
language features to use.


If we do, I think that would surely be an option.


What we're essentially doing with the M4 stuff and the
proposed in-house Python reimplementation is to make up for lack of
monomorphization in plain old C. Rather than doing some DIY templates,
switch the implementation language to something which has that feature
built-in, in this case C++.  No need to convert the entire libgfortran
to C++ if you don't want to, just those objects that are generated
from the M4 templates. Something like

template
void matmul(T* a, T* b, T* c, ...)
{
// actual matmul code here
}

extern "C" {
   // Instantiate template for every type and export the symbol
   void matmul_r4(gfc_array_r4* a, gfc_array_r4* b, gfc_array_r4* c, ...)
   {
 matmul(a, b, c, ...);
   }
   // And so on for other types
}


Cheers,

Tobias


gcc-wwwdocs branch master updated. 794555052d5c1d9a92298aba1fc4b645042946dd

2024-04-16 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  794555052d5c1d9a92298aba1fc4b645042946dd (commit)
  from  c5e08294215518f00e9762cebe3d6f46f1f00526 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 794555052d5c1d9a92298aba1fc4b645042946dd
Author: Tobias Burnus 
Date:   Tue Apr 16 09:57:57 2024 +0200

gcc-14/changes.html + projects/gomp/: Fix OpenMP/OpenACC changes 
section/anchor

In earlier release notes, OpenMP and OpenACC changes were under "New
Languages and Language specific improvements", either directly under that
section as in 4.2, 4.4, 4.7, 4.9, 5, 6 (+ c-family + Fortran), 10, 11, and 
12
or under a subsection in 4.5 (Fortran), 4.8 (C++), 7 (Fortran), 9 
(c-family).

In gcc-13, the OpenMP and OpenACC ended up by chance under "General
Improvements", which gcc-14 replicated.

This commit does not touch gcc-13 to avoid breaking links, but it corrects 
the
anchor used in the links to GCC 13 in projects/gomp/.

However, for GCC 14, it moves the OpenMP/OpenACC changes to the language
section.

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index b4c602a5..6035ae37 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -59,6 +59,75 @@ a work-in-progress.
 
 General Improvements
 
+
+  For offload-device code generated via OpenMP and OpenACC, the math
+  and the Fortran runtime libraries will now automatically be linked,
+  when the user or compiler links them on the host side. Thus, it is no
+  longer required to explicitly pass -lm and/or
+  -lgfortran to the offload-device linker using the https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html#index-foffload-options;
+  >-foffload-options= flag.
+  
+  
+New configure options: --enable-host-pie, to build the
+compiler executables as PIE; and --enable-host-bind-now,
+to link the compiler executables with -Wl,-z,now in order
+to enable additional hardening.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#index-fhardened;>-fhardened,
+an umbrella option that enables a set of hardening flags.
+The options it enables can be displayed using the
+--help=hardened option.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#index-fharden-control-flow-redundancy;>-fharden-control-flow-redundancy,
+to verify, at the end of functions, that the visited basic blocks
+correspond to a legitimate execution path, so as to detect and
+prevent attacks that transfer control into the middle of
+functions.
+  
+  
+New type attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-hardbool-type-attribute;>hardbool,
+for C and Ada.  Hardened
+booleans take user-specified representations for true
+and false, presumably with higher hamming distance
+than standard booleans, and get verified at every use, detecting
+memory corruption and some malicious attacks.
+  
+  
+New type attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-strub-type-attribute;>strub
+to control stack scrubbing
+properties of functions and variables.  The stack frame used by
+functions marked with the attribute gets zeroed-out upon returning
+or exception escaping.  Scalar variables marked with the attribute
+cause functions contaning or accessing them to get stack scrubbing
+enabled implicitly.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-finline-stringops;>-finline-stringops,
+to force inline
+expansion of memcmp, memcpy,
+memmove and memset, even when that is
+not an optimization, to avoid relying on library
+implementations.
+  
+  
+
+New function attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-null_005fterminated_005fstring_005farg-function-attribute;>
 null_terminated_string_arg(PARAM_IDX)
+for indicating parameters that are expected to be null-terminated
+strings.
+  
+
+
+New Languages and Language specific improvements
+
 
   https://gcc.gnu.org/projects/gomp/;>OpenMP
   
@@ -136,73 +205,7 @@ a work-in-progress.
   acc_memcpy_from_device_async.
   
   
-  For offload-device code generated via OpenMP and OpenACC, the math
-  and the Fortran runtime libraries will now automatically be linked,
-  when the user or compiler links them on the host side. Thus, it is no
-  longer re

Re: [wwwdocs] gcc-14/changes.html (AMD GCN): Mention gfx1036 support

2024-04-15 Thread Tobias Burnus

Richard Biener wrote:

I do wonder whether hot-patching the ELF header from the libgomp plugin
with the actual micro-subarch would be possible to make the driver happy.


For completeness, there is also the possibility to play with an 
environment variable as in HSA_OVERRIDE_GFX_VERSION=9.0.0 or 
HSA_OVERRIDE_GFX_VERSION=11.0.0


Tobias


[wwwdocs] gcc-14/changes.html + projects/gomp/: Fix OpenMP/OpenACC changes section/anchor

2024-04-15 Thread Tobias Burnus
When clicking on the GCC..1x links at 
https://gcc.gnu.org/projects/gomp/#omp5.0 , I noticed that the GCC 13 
and 14 links did not link to the OpenMP changes.


It turned out that in GCC 12 and before (see commit message for 
details), the OpenMP and OpenACC changes are under "New Languages and 
Language-Specific Improvements" – while for GCC 13 and 14 they are under 
"General Improvements"


Example: GCC 12 – https://gcc.gnu.org/gcc-12/changes.html#languages 
(directly under  and before the first  entry ["Ada"]).


GCC 13: https://gcc.gnu.org/gcc-13/changes.html#general

The attached patch keeps GCC 13 for backward compatibility but moves 
them for GCC 14 "back" to languages.


To fix the links at projects/gomp/, it therefore it updates the page 
anchors to 'general'.


* * *

Comments or remarks?

Tobias
gcc-14/changes.html + projects/gomp/: Fix OpenMP/OpenACC changes section/anchor

In earlier release notes, OpenMP and OpenACC changes were under "New
Languages and Language specific improvements", either directly under that
section as in 4.2, 4.4, 4.7, 4.9, 5, 6 (+ c-family + Fortran), 10, 11, and 12
or under a subsection in 4.5 (Fortran), 4.8 (C++), 7 (Fortran), 9 (c-family).

In gcc-13, the OpenMP and OpenACC ended up by chance under "General
Improvements", which gcc-14 replicated.

This commit does not touch gcc-13 to avoid breaking links, but it corrects the
anchor used in the links to GCC 13 in projects/gomp/.

However, for GCC 14, it moves the OpenMP/OpenACC changes to the language
section.

 htdocs/gcc-14/changes.html  | 135 
 htdocs/projects/gomp/index.html |  44 ++---
 2 files changed, 91 insertions(+), 88 deletions(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index b4c602a5..6035ae37 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -59,6 +59,75 @@ a work-in-progress.
 
 General Improvements
 
+
+  For offload-device code generated via OpenMP and OpenACC, the math
+  and the Fortran runtime libraries will now automatically be linked,
+  when the user or compiler links them on the host side. Thus, it is no
+  longer required to explicitly pass -lm and/or
+  -lgfortran to the offload-device linker using the https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html#index-foffload-options;
+  >-foffload-options= flag.
+  
+  
+New configure options: --enable-host-pie, to build the
+compiler executables as PIE; and --enable-host-bind-now,
+to link the compiler executables with -Wl,-z,now in order
+to enable additional hardening.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#index-fhardened;>-fhardened,
+an umbrella option that enables a set of hardening flags.
+The options it enables can be displayed using the
+--help=hardened option.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#index-fharden-control-flow-redundancy;>-fharden-control-flow-redundancy,
+to verify, at the end of functions, that the visited basic blocks
+correspond to a legitimate execution path, so as to detect and
+prevent attacks that transfer control into the middle of
+functions.
+  
+  
+New type attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-hardbool-type-attribute;>hardbool,
+for C and Ada.  Hardened
+booleans take user-specified representations for true
+and false, presumably with higher hamming distance
+than standard booleans, and get verified at every use, detecting
+memory corruption and some malicious attacks.
+  
+  
+New type attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-strub-type-attribute;>strub
+to control stack scrubbing
+properties of functions and variables.  The stack frame used by
+functions marked with the attribute gets zeroed-out upon returning
+or exception escaping.  Scalar variables marked with the attribute
+cause functions contaning or accessing them to get stack scrubbing
+enabled implicitly.
+  
+  
+New option
+https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-finline-stringops;>-finline-stringops,
+to force inline
+expansion of memcmp, memcpy,
+memmove and memset, even when that is
+not an optimization, to avoid relying on library
+implementations.
+  
+  
+
+New function attribute
+https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-null_005fterminated_005fstring_005farg-function-attribute;> null_terminated_string_arg(PARAM_IDX)
+for indicating parameters that are expected to be null-terminated
+strings.
+  
+
+
+New Languages and Language specific improvements
+
 
   https://gcc.gnu.org/projects/gomp/;>OpenMP
   
@@ -136,73 +205,7 @@ a work-in-progress.
   acc_memcpy_from_device_async.
   
   
-  For offload-device code 

gcc-wwwdocs branch master updated. c5e08294215518f00e9762cebe3d6f46f1f00526

2024-04-15 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  c5e08294215518f00e9762cebe3d6f46f1f00526 (commit)
  from  d18a80a52a7ec2edd7ef9a583d8920d61c0b48e5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit c5e08294215518f00e9762cebe3d6f46f1f00526
Author: Tobias Burnus 
Date:   Mon Apr 15 13:16:36 2024 +0200

gcc-14/changes.html (AMD GCN): Mention gfx1036 support

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 8ac08e9a..b4c602a5 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -623,8 +623,9 @@ a work-in-progress.
 AMD Radeon (GCN)
 
 
-  Initial support for the AMD Radeon gfx1030 (RDNA2),
-gfx1100 and gfx1103 (RDNA3) devices has been
+  Initial support for the AMD Radeon gfx1030,
+gfx1036 (RDNA2), gfx1100 and
+gfx1103 (RDNA3) devices has been
 added. LLVM 15+ (assembler and linker) is https://gcc.gnu.org/install/specific.html#amdgcn-x-amdhsa;>required
 to support GFX11.

---

Summary of changes:
 htdocs/gcc-14/changes.html | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


[wwwdocs] gcc-14/changes.html (AMD GCN): Mention gfx1036 support

2024-04-15 Thread Tobias Burnus
I experimented with some variants to make clearer that each of RDNA2 and 
RNDA3 applies to two card types, but at the end I settled on the 
fewest-word version.


Comments, remarks, suggestions? (To this change or in general?)

Current version: https://gcc.gnu.org/gcc-14/changes.html#amdgcn

Compiler flags, listing the the gfx* cards: 
https://gcc.gnu.org/onlinedocs/gcc/AMD-GCN-Options.html


Tobias

PS: On the compiler side, I am looking forward to a .def file which 
reduces the number of files to change when adding a new gfx* card, given 
that we have doubled the number of entries. [Well, 1 missing but I know 
of one WIP addition.]
gcc-14/changes.html (AMD GCN): Mention gfx1036 support

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 8ac08e9a..b4c602a5 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -623,8 +623,9 @@ a work-in-progress.
 AMD Radeon (GCN)
 
 
-  Initial support for the AMD Radeon gfx1030 (RDNA2),
-gfx1100 and gfx1103 (RDNA3) devices has been
+  Initial support for the AMD Radeon gfx1030,
+gfx1036 (RDNA2), gfx1100 and
+gfx1103 (RDNA3) devices has been
 added. LLVM 15+ (assembler and linker) is https://gcc.gnu.org/install/specific.html#amdgcn-x-amdhsa;>required
 to support GFX11.


[gcc r14-9843] Fortran: Accept again tab as alternative to space as separator [PR114304]

2024-04-08 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:477c8a82f38e353a8c6313b38197c70b12deea80

commit r14-9843-g477c8a82f38e353a8c6313b38197c70b12deea80
Author: Tobias Burnus 
Date:   Mon Apr 8 21:47:51 2024 +0200

Fortran: Accept again tab as alternative to space as separator [PR114304]

This fixes a side-effect of/regression caused by r14-9822-g93adf88cc6744a,
which was for the same PR.

PR libfortran/114304

libgfortran/ChangeLog:

* io/list_read.c (eat_separator): Accept tab as alternative to 
space.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr114304-2.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/pr114304-2.f90 | 82 
 libgfortran/io/list_read.c   |  2 +-
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/pr114304-2.f90 
b/gcc/testsuite/gfortran.dg/pr114304-2.f90
new file mode 100644
index 000..5ef5874f528
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr114304-2.f90
@@ -0,0 +1,82 @@
+! { dg-do run }
+!
+! PR fortran/114304
+!
+! Ensure that '\t' (tab) is supported as separator in list-directed input
+! While not really standard conform, this is widely used in user input and
+! widely supported.
+!
+
+use iso_c_binding
+implicit none
+character(len=*,kind=c_char), parameter :: tab = C_HORIZONTAL_TAB
+
+! Accept '' as variant to ' ' as separator
+! Check that  and  are handled
+
+character(len=*,kind=c_char), parameter :: nml_str &
+   = ''//C_CARRIAGE_RETURN // C_NEW_LINE // &
+ 'first'//tab//'='//tab//' .true.'// C_NEW_LINE // &
+ ' , other'//tab//' ='//tab//'3'//tab//', 2'//tab//'/'
+
+! Check that  is handled,
+
+! Note: For new line, Unix uses \n, Windows \r\n but old Apple systems used 
'\r'
+!
+! Gfortran does not seem to support all \r, but the following is supported
+! since ages, ! which seems to be a gfortran extension as ifort and flang 
don't like it.
+
+character(len=*,kind=c_char), parameter :: nml_str2 &
+   = ''//C_CARRIAGE_RETURN // C_NEW_LINE // &
+ 'first'//C_NEW_LINE//'='//tab//' .true.'// C_CARRIAGE_RETURN // &
+ ' , other'//tab//' ='//tab//'3'//tab//', 2'//tab//'/'
+
+character(len=*,kind=c_char), parameter :: str &
+   = tab//'1'//tab//'2,'//tab//'3'//tab//',4'//tab//','//tab//'5'//tab//'/'
+character(len=*,kind=c_char), parameter :: str2 &
+   = tab//'1'//tab//'2;'//tab//'3'//tab//';4'//tab//';'//tab//'5'//tab//'/'
+logical :: first
+integer :: other(4)
+integer :: ints(6)
+namelist /inparm/ first , other
+
+other = 1
+
+open(99, file="test.inp")
+write(99, '(a)') nml_str
+rewind(99)
+read(99,nml=inparm)
+close(99, status="delete")
+
+if (.not.first .or. any (other /= [3,2,1,1])) stop 1
+
+other = 9
+
+open(99, file="test.inp")
+write(99, '(a)') nml_str2
+rewind(99)
+read(99,nml=inparm)
+close(99, status="delete")
+
+if (.not.first .or. any (other /= [3,2,9,9])) stop 2
+
+ints = 66
+
+open(99, file="test.inp", decimal='point')
+write(99, '(a)') str
+rewind(99)
+read(99,*) ints
+close(99, status="delete")
+
+if (any (ints /= [1,2,3,4,5,66])) stop 3
+
+ints = 77 
+
+open(99, file="test.inp", decimal='comma')
+write(99, '(a)') str2
+rewind(99)
+read(99,*) ints
+close(99, status="delete")
+
+if (any (ints /= [1,2,3,4,5,77])) stop 4
+end
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index b56f2a4e6d6..5bbbef26c26 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -463,7 +463,7 @@ eat_separator (st_parameter_dt *dtp)
 
   dtp->u.p.comma_flag = 0;
   c = next_char (dtp);
-  if (c == ' ')
+  if (c == ' ' || c == '\t')
 {
   eat_spaces (dtp);
   c = next_char (dtp);


[Patch] Fortran: List-directed read - accept again tab as alternative to space as separator [PR114304] (was: [patch, libgfortran] PR114304 - [13/14 Regression] libgfortran I/O – bogus "Semicolon not a

2024-04-08 Thread Tobias Burnus

Jerry D wrote:

See attached updated patch.


It turned rather quickly out that this patch – committed as 
r14-9822-g93adf88cc6744a – caused regressions.


Namely, real-world code use tab(s) as separator instead of spaces.

[For instance, PR114304 which contains a named-list input file from SPEC 
CPU 2017; that example uses tabs before the '=' sign, but the issue is 
more generic.]


I think the ISO Fortran standard only permits spaces, but as it feels 
natural and is widely supported, tabs are used and should remain supported.


It is not quite clear how '\r' are or should be handled, but as 
eat_spaces did use it, I thought I would add one testcase using them as 
well.


That test is not affected by my change; it did work before with GCC and 
still does – but it does fail with ifort/ifx/flang. I have not thought 
deeply whether it should be supported or not – and looking at the 
libgfortran source file, it often but (→ testcase) not consistently 
requires that an \n follows the \r.


OK for mainline? [And: When the previous patch gets backported, this 
surely needs to be included as well.]


Tobias
Fortran: Accept again tab as alternative to space as separator [PR114304]

This fixes a side-effect of/regression caused by r14-9822-g93adf88cc6744a,
which was for the same PR.

	PR libfortran/114304

libgfortran/ChangeLog:

	* io/list_read.c (eat_separator): Accept tab as alternative to space.

gcc/testsuite/ChangeLog:

	* gfortran.dg/pr114304-2.f90: New test.

 gcc/testsuite/gfortran.dg/pr114304-2.f90 | 82 
 libgfortran/io/list_read.c   |  2 +-
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/pr114304-2.f90 b/gcc/testsuite/gfortran.dg/pr114304-2.f90
new file mode 100644
index 000..5ef5874f528
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr114304-2.f90
@@ -0,0 +1,82 @@
+! { dg-do run }
+!
+! PR fortran/114304
+!
+! Ensure that '\t' (tab) is supported as separator in list-directed input
+! While not really standard conform, this is widely used in user input and
+! widely supported.
+!
+
+use iso_c_binding
+implicit none
+character(len=*,kind=c_char), parameter :: tab = C_HORIZONTAL_TAB
+
+! Accept '' as variant to ' ' as separator
+! Check that  and  are handled
+
+character(len=*,kind=c_char), parameter :: nml_str &
+   = ''//C_CARRIAGE_RETURN // C_NEW_LINE // &
+ 'first'//tab//'='//tab//' .true.'// C_NEW_LINE // &
+ ' , other'//tab//' ='//tab//'3'//tab//', 2'//tab//'/'
+
+! Check that  is handled,
+
+! Note: For new line, Unix uses \n, Windows \r\n but old Apple systems used '\r'
+!
+! Gfortran does not seem to support all \r, but the following is supported
+! since ages, ! which seems to be a gfortran extension as ifort and flang don't like it.
+
+character(len=*,kind=c_char), parameter :: nml_str2 &
+   = ''//C_CARRIAGE_RETURN // C_NEW_LINE // &
+ 'first'//C_NEW_LINE//'='//tab//' .true.'// C_CARRIAGE_RETURN // &
+ ' , other'//tab//' ='//tab//'3'//tab//', 2'//tab//'/'
+
+character(len=*,kind=c_char), parameter :: str &
+   = tab//'1'//tab//'2,'//tab//'3'//tab//',4'//tab//','//tab//'5'//tab//'/'
+character(len=*,kind=c_char), parameter :: str2 &
+   = tab//'1'//tab//'2;'//tab//'3'//tab//';4'//tab//';'//tab//'5'//tab//'/'
+logical :: first
+integer :: other(4)
+integer :: ints(6)
+namelist /inparm/ first , other
+
+other = 1
+
+open(99, file="test.inp")
+write(99, '(a)') nml_str
+rewind(99)
+read(99,nml=inparm)
+close(99, status="delete")
+
+if (.not.first .or. any (other /= [3,2,1,1])) stop 1
+
+other = 9
+
+open(99, file="test.inp")
+write(99, '(a)') nml_str2
+rewind(99)
+read(99,nml=inparm)
+close(99, status="delete")
+
+if (.not.first .or. any (other /= [3,2,9,9])) stop 2
+
+ints = 66
+
+open(99, file="test.inp", decimal='point')
+write(99, '(a)') str
+rewind(99)
+read(99,*) ints
+close(99, status="delete")
+
+if (any (ints /= [1,2,3,4,5,66])) stop 3
+
+ints = 77 
+
+open(99, file="test.inp", decimal='comma')
+write(99, '(a)') str2
+rewind(99)
+read(99,*) ints
+close(99, status="delete")
+
+if (any (ints /= [1,2,3,4,5,77])) stop 4
+end
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index b56f2a4e6d6..5bbbef26c26 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -463,7 +463,7 @@ eat_separator (st_parameter_dt *dtp)
 
   dtp->u.p.comma_flag = 0;
   c = next_char (dtp);
-  if (c == ' ')
+  if (c == ' ' || c == '\t')
 {
   eat_spaces (dtp);
   c = next_char (dtp);


Re: [patch, libgfortran] PR114304 - [13/14 Regression] libgfortran I/O – bogus "Semicolon not allowed as separator with DECIMAL='point'"

2024-04-05 Thread Tobias Burnus

Hi Jerry, hello world,

Jerry D wrote:

On 4/5/24 10:47 AM, Jerry D wrote:

On 4/4/24 2:41 PM, Tobias Burnus wrote:
I think for the current testcases, I like the patch – the question 
is only what's about:

   ',3' as input for 'comma'   (or '.3' as input for 'point')
[...]
But for 'comma': [...]
* GCC with your patch: Same result: ios != 0 and nothing read.

Expected: [...] read-in value is 0.3. [...]



See attached updated patch.
Regressions tested on x86-64. OK for trunk and 13 after a bit.


OK. Thanks for the patch!

Tobias



gcc-wwwdocs branch master updated. 8765e9c73ae14cfad592b8a3885fe1bcc3ff96cd

2024-04-05 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  8765e9c73ae14cfad592b8a3885fe1bcc3ff96cd (commit)
   via  62e1ccdc5b71b7fa9162c336c0964d13c6fa5c79 (commit)
  from  c9e275660a19c804dd8c591c73cb9b169a9d7573 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 8765e9c73ae14cfad592b8a3885fe1bcc3ff96cd
Author: Tobias Burnus 
Date:   Fri Apr 5 11:58:56 2024 +0200

gcc-14/changes.html: Mention OpenACC 2.7's 'readonly' modifier

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 6ddd2788..2d8968cf 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -121,7 +121,9 @@ a work-in-progress.
   
 OpenACC 2.7: The self clause was added to be used on
   compute constructs and the default clause for data
-  constructs.
+  constructs. Additionally, the readonly modifier is now
+  handled in the copyin clause and cache
+  directive.
 OpenACC 3.2: The following API routines are now available in
   Fortran using the openacc module or the
   openacc_lib.h header file:

commit 62e1ccdc5b71b7fa9162c336c0964d13c6fa5c79
Author: Tobias Burnus 
Date:   Fri Apr 5 11:58:06 2024 +0200

gcc-14/changes.html: Comment out  of empty sections

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 1cc68430..6ddd2788 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -748,7 +748,7 @@ __asm (".global __flmap_lock"  "\n\t"
 
 
 
-Operating Systems
+
 
 
 
@@ -994,7 +994,7 @@ it emits:
 
 
 
-Other significant improvements
+
 
 
 

---

Summary of changes:
 htdocs/gcc-14/changes.html | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


Re: [patch, libgfortran] PR114304 - [13/14 Regression] libgfortran I/O – bogus "Semicolon not allowed as separator with DECIMAL='point'"

2024-04-04 Thread Tobias Burnus

Hi Jerry,

I think for the current testcases, I like the patch – the question is 
only what's about:


  ',3' as input for 'comma'   (or '.3' as input for 'point')

For 'point' – 0.3 is read and ios = 0 (as expected)
But for 'comma':
* GCC 12 reads nothing and has ios = 0.
* GCC 13/mainline has an error (ios != 0 – and reads nothing)
* GCC with your patch: Same result: ios != 0 and nothing read.

Expected: Same as with ','/'comma' – namely: read-in value is 0.3.
→ https://godbolt.org/z/4rc8fz4sT for the full example, which works with 
ifort, ifx and flang


* * *

Can you check and fix this? It looks perfectly valid to me to have 
remove the '0' in the floating point numbers '0.3' or '0,3' seems to be 
permitted – and it works for '.' (with 'point') but not for ',' (with 
'comma').


F2023's "13.10.3.1 List-directed input forms" refers to "13.7.2.3.2 F 
editing", which states:


"The standard form of the input field [...] The form of the mantissa is 
an optional sign, followed by a string of one or more digits optionally 
containing a decimal symbol."


The latter does not require that the digit has to be before the decimal 
sign and as for output, it is optional, it is surely intended that ",3" 
is a valid floating-point number for decimal='comma'.


* * *

I extended the testcase to check for this – see attached diff. All 
'point' work, all 'comma' fail.


Thanks for working on this!

Tobiasdiff --git a/gcc/testsuite/gfortran.dg/pr114304.f90 b/gcc/testsuite/gfortran.dg/pr114304.f90
index 8344a9ea857..2bcf9bc7f57 100644
--- a/gcc/testsuite/gfortran.dg/pr114304.f90
+++ b/gcc/testsuite/gfortran.dg/pr114304.f90
@@ -70,7 +70,25 @@
   call t(.true.,  'point', '4,4 ,', .true.)
   call t(.true.,  'comma', '4;4 ;', .true.)
   call t(.true.,  'point', '4,4 ;', .true.)
+
+  call t2('comma', ',2')
+  call t2('point', '.2')
+  call t2('comma', ',2;')
+  call t2('point', '.2,')
+  call t2('comma', ',2 ,')
+  call t2('point', '.2 .')
 contains
+subroutine t2(dec, testinput)
+  character(*) :: dec, testinput
+  integer ios
+  real :: r
+  r = 42
+  read(testinput,*,decimal=dec,iostat=ios) r
+  if (ios /= 0 .or.  abs(r - 0.2) > epsilon(r)) then
+print '(*(g0))', dec, ', testinput = "',testinput,'"',', r=',r,' ios=',ios
+stop 3 
+  end if
+end
 subroutine t(valid, dec, testinput, isreal)
   logical, value :: valid
   character(len=*) :: dec, testinput


[wwwdocs] gcc-14/changes.html: Comment out of empty sections

2024-04-04 Thread Tobias Burnus
I find it confusing to see multiple  in a row without content. 
Actually, both have  as content, but those are commented out as 
actual news is missing ...


See https://gcc.gnu.org/gcc-14/changes.html and see the last entry at 
the bottom of the page and "Operating Systems" somewhere in between.


And comment, remark or suggestion before I commit this?

Tobias
gcc-14/changes.html: Comment out  of empty sections

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 1cc68430..6ddd2788 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -748,7 +748,7 @@ __asm (".global __flmap_lock"  "\n\t"
 
 
 
-Operating Systems
+
 
 
 
@@ -994,7 +994,7 @@ it emits:
 
 
 
-Other significant improvements
+
 
 
 


[wwwdocs] gcc-14/changes.html: Mention OpenACC 2.7's 'readonly' modifier

2024-04-04 Thread Tobias Burnus

Minor OpenACC 2.7 update to https://gcc.gnu.org/gcc-14/changes.html#openacc

The 'readonly' modifier is now in (well, since March), albeit more 2.7 
features are in the pipeline...


Comments, remarks, suggestions before I commit it?

Tobias
gcc-14/changes.html: Mention OpenACC 2.7's 'readonly' modifier

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 045893cf..58f153ec 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -121,7 +121,9 @@ a work-in-progress.
   
 OpenACC 2.7: The self clause was added to be used on
   compute constructs and the default clause for data
-  constructs.
+  constructs. Additionally, the readonly modifier is now
+  handled in the copyin clause and cache
+  directive.
 OpenACC 3.2: The following API routines are now available in
   Fortran using the openacc module or the
   openacc_lib.h header file:


[wwwdocs,committed] gcc-14/changes.html: Fix HTML syntax

2024-04-04 Thread Tobias Burnus

Found when testing my own change via https://validator.w3.org/nu/#file

Committed as obvious.

Tobias
commit c9e275660a19c804dd8c591c73cb9b169a9d7573
Author: Tobias Burnus 
Date:   Thu Apr 4 22:07:28 2024 +0200

gcc-14/changes.html: Fix HTML syntax

W3.org's HTML checker complained about missing  and
about ... within a ... (or rather: it complained about
the unexpected '').
---
 htdocs/gcc-14/changes.html | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 045893cf..1cc68430 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -861,7 +861,7 @@ __asm (".global __flmap_lock"  "\n\t"
   
 
 The analyzer now makes use of the function attribute
-https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute;>alloc_size
+https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute;>alloc_size
 allowing
 https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-fanalyzer;>-fanalyzer
 to emit
@@ -887,7 +887,7 @@ __asm (".global __flmap_lock"  "\n\t"
   
   
 
-The warning
+  The warning
   https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-out-of-bounds;>-Wanalyzer-out-of-bounds
   has been extended so that, where possible, it will emit a text-based
   diagram visualizing the spatial relationship between
@@ -899,9 +899,9 @@ __asm (".global __flmap_lock"  "\n\t"
   whether they overlap, are touching, are close or far apart;
   which one is before or after in memory, the relative sizes involved,
   the direction of the access (read vs write), and, in some cases,
-  the values of data involved.
+  the values of data involved.
 Such "text art" diagrams can be controlled (or suppressed) via a new
-  https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-text-art-charset;>-fdiagnostics-text-art-charset= option.
+  https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-text-art-charset;>-fdiagnostics-text-art-charset= option.
 For example, given the out-of-bounds write in strcat in:
   
 
@@ -953,17 +953,17 @@ it emits:
   
 
 The SARIF output from
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=
 now adds indentation and newlines to reflect the logical JSON structure of the data.  The previous compact behavior can be restored via the new option
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fno-diagnostics-json-formatting;>-fno-diagnostics-json-formatting.
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fno-diagnostics-json-formatting;>-fno-diagnostics-json-formatting.
 This also applies to the older output format named "json".
   
   
 
 If profiling information about the compiler itself is requested via
-https://gcc.gnu.org/onlinedocs/gcc/Developer-Options.html#index-ftime-report;>-ftime-report,
+https://gcc.gnu.org/onlinedocs/gcc/Developer-Options.html#index-ftime-report;>-ftime-report,
 and a SARIF output format is requested via
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=,
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=,
 then the timing and memory usage data is now written in JSON form into
 the SARIF output, rather than as plain text to stderr.
   


gcc-wwwdocs branch master updated. c9e275660a19c804dd8c591c73cb9b169a9d7573

2024-04-04 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  c9e275660a19c804dd8c591c73cb9b169a9d7573 (commit)
  from  6eeeb6a53c2e57e3f02f97da176589cf15877247 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit c9e275660a19c804dd8c591c73cb9b169a9d7573
Author: Tobias Burnus 
Date:   Thu Apr 4 22:07:28 2024 +0200

gcc-14/changes.html: Fix HTML syntax

W3.org's HTML checker complained about missing  and
about ... within a ... (or rather: it complained about
the unexpected '').

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 045893cf..1cc68430 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -861,7 +861,7 @@ __asm (".global __flmap_lock"  "\n\t"
   
 
 The analyzer now makes use of the function attribute
-https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute;>alloc_size
+https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute;>alloc_size
 allowing
 https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-fanalyzer;>-fanalyzer
 to emit
@@ -887,7 +887,7 @@ __asm (".global __flmap_lock"  "\n\t"
   
   
 
-The warning
+  The warning
   https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-out-of-bounds;>-Wanalyzer-out-of-bounds
   has been extended so that, where possible, it will emit a text-based
   diagram visualizing the spatial relationship between
@@ -899,9 +899,9 @@ __asm (".global __flmap_lock"  "\n\t"
   whether they overlap, are touching, are close or far apart;
   which one is before or after in memory, the relative sizes involved,
   the direction of the access (read vs write), and, in some cases,
-  the values of data involved.
+  the values of data involved.
 Such "text art" diagrams can be controlled (or suppressed) via a new
-  https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-text-art-charset;>-fdiagnostics-text-art-charset=
 option.
+  https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-text-art-charset;>-fdiagnostics-text-art-charset=
 option.
 For example, given the out-of-bounds write in strcat in:
   
 
@@ -953,17 +953,17 @@ it emits:
   
 
 The SARIF output from
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=
 now adds indentation and newlines to reflect the logical JSON structure of 
the data.  The previous compact behavior can be restored via the new option
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fno-diagnostics-json-formatting;>-fno-diagnostics-json-formatting.
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fno-diagnostics-json-formatting;>-fno-diagnostics-json-formatting.
 This also applies to the older output format named "json".
   
   
 
 If profiling information about the compiler itself is requested via
-https://gcc.gnu.org/onlinedocs/gcc/Developer-Options.html#index-ftime-report;>-ftime-report,
+https://gcc.gnu.org/onlinedocs/gcc/Developer-Options.html#index-ftime-report;>-ftime-report,
 and a SARIF output format is requested via
-https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=,
+https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-format;>-fdiagnostics-format=,
 then the timing and memory usage data is now written in JSON form into
 the SARIF output, rather than as plain text to stderr.
   

---

Summary of changes:
 htdocs/gcc-14/changes.html | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


[gcc r14-9792] nvptx: In mkoffload.cc, call diagnostic_color_init + gcc_init_libintl

2024-04-04 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:7520a4992c94254016085a461c58c972497c4483

commit r14-9792-g7520a4992c94254016085a461c58c972497c4483
Author: Tobias Burnus 
Date:   Thu Apr 4 21:55:29 2024 +0200

nvptx: In mkoffload.cc, call diagnostic_color_init + gcc_init_libintl

gcc/ChangeLog:

* config/nvptx/mkoffload.cc (main): Call
gcc_init_libintl and diagnostic_color_init.

Diff:
---
 gcc/config/nvptx/mkoffload.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index a7fc28cbd3f..503b1abcefd 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -638,7 +638,9 @@ main (int argc, char **argv)
   const char *outname = 0;
 
   progname = tool_name;
+  gcc_init_libintl ();
   diagnostic_initialize (global_dc, 0);
+  diagnostic_color_init (global_dc);
 
   if (atexit (mkoffload_cleanup) != 0)
 fatal_error (input_location, "atexit failed");


gcc-wwwdocs branch master updated. 5355f9e63f8240f6a3753a6f9ae10133d0c34e38

2024-04-04 Thread Tobias Burnus via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  5355f9e63f8240f6a3753a6f9ae10133d0c34e38 (commit)
  from  501aef9bacc3842d0b7d022a4333c9d71d419d4d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit 5355f9e63f8240f6a3753a6f9ae10133d0c34e38
Author: Tobias Burnus 
Date:   Thu Apr 4 12:22:12 2024 +0200

projects/gomp/: Update 5.2 (fix misplaced GCC 14) and TR12 (new items) 
status

diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index b8f11508..798efb21 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -846,7 +846,7 @@ than listed, depending on resolved corner cases and 
optimizations.
   
   
 declare mapper with iterator and present 
modifiers
-GCC14
+No
 
   
   
@@ -871,7 +871,7 @@ than listed, depending on resolved corner cases and 
optimizations.
   
   
 New allocators directive for Fortran
-No
+GCC14
 
   
   
@@ -1225,9 +1225,9 @@ error.
 
   
   
-coexecute directive for Fortran
+workdistribute directive for Fortran
 No
-
+Renamed just after TR12; added in TR12 as coexecute
   
   
 Fortran DO CONCURRENT as associated loop in a loop
@@ -1295,6 +1295,11 @@ error.
 No
 
   
+  
+Canonical loop nest enclosed in (multiple) curly braces (C/C++) or 
BLOCK constructs (Fortran)
+No
+
+  
   
 Relaxed Fortran restrictions to the aligned clause
 No

---

Summary of changes:
 htdocs/projects/gomp/index.html | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)


hooks/post-receive
-- 
gcc-wwwdocs


[gcc r14-9785] libgomp.texi: Update entries in OpenMP TR12 implementation status

2024-04-04 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:1c89d1b9dc8c4957a4ec3674f691595641fd279b

commit r14-9785-g1c89d1b9dc8c4957a4ec3674f691595641fd279b
Author: Tobias Burnus 
Date:   Thu Apr 4 12:20:48 2024 +0200

libgomp.texi: Update entries in OpenMP TR12 implementation status

libgomp/ChangeLog:

* libgomp.texi (TR12): Honor post-TR12 directive name change; add
item about curly braces/BLOCK permitted in canonical loop nests.

Diff:
---
 libgomp/libgomp.texi | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 1ae0f01ccdc..71d62105a20 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -515,7 +515,8 @@ Technical Report (TR) 12 is the second preview for OpenMP 
6.0.
 @item @code{strict} modifier keyword to @code{num_threads} @tab N @tab
 @item @code{atomic} permitted in a construct with @code{order(concurrent)}
   @tab N @tab
-@item @code{coexecute} directive for Fortran @tab N @tab
+@item @code{workdistribute} directive for Fortran @tab N
+  @tab Renamed just after TR12; added in TR12 as @code{coexecute}
 @item Fortran DO CONCURRENT as associated loop in a @code{loop} construct
   @tab N @tab
 @item @code{threadset} clause in task-generating constructs @tab N @tab
@@ -539,6 +540,8 @@ Technical Report (TR) 12 is the second preview for OpenMP 
6.0.
 
 @unnumberedsubsec Other new TR 12 features
 @multitable @columnfractions .60 .10 .25
+@item Canonical loop nest enclosed in (multiple) curly braces (C/C++) or BLOCK 
constructs (Fortran)
+  @tab N @tab
 @item Relaxed Fortran restrictions to the @code{aligned} clause @tab N @tab
 @item Mapping lambda captures @tab N @tab
 @item New @code{omp_pause_stop_tool} constant for omp_pause_resource @tab N 
@tab


Re: [patch, libgfortran] PR114304 - [13/14 Regression] libgfortran I/O – bogus "Semicolon not allowed as separator with DECIMAL='point'"

2024-04-04 Thread Tobias Burnus

Hi Jerry,

Jerry D wrote:
The attached log entry and patch (git show) fixes this issue by adding 
logic to handle spaces in eat_separators. One or more spaces by 
themselves are a valid separator. So in this case we look at the 
character following the spaces to see if it is a comma or semicolon.


If so, I change it to the valid separator for the given decimal mode, 
point or comma. This allows the comma or semicolon to be interpreted as 
a null read on the next effective item in the formatted read.


I chose a permissive approach here that allows reads to proceed when the
input line is mal-formed with an incorrect separator as long as there is 
at least one space in front of it.


First: Consider also adding 'PR fortran/105473' to the commit log
as the PRs are closely related, albeit this PR is different-

The patch looks mostly like I would expect, except for decimal='point' 
and a ';' which is *not* preceded by a space.


Thanks for working on it.

Regarding the 'except' case:

* * *

If I try your patch with the testcase of at comment 19,

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114304#c19
→ https://gcc.gnu.org/bugzilla/attachment.cgi?id=57695 ,

I do note that with 'decimal=point', a tailing semicolon is silently
accepted – even if not proceeded by a space.

I think such code is invalid – and you could consider to reject it.
Otherwise, the handling all seems to be in line with the Fortran spec.

i.e. for the following string, I had *expected an error*:

 point, isreal =  F , testinput = ";"n=  42  ios=   0
 point, isreal =  F , testinput = "5;"n=   5  ios=   0
 point, isreal =  T , testinput = "8;"r=   8.  ios= 0
 point, isreal =  T , testinput = "3.3;"r=   3.2995  ios= 0
 point, isreal =  T , testinput = "3,3;"r=   3.  ios= 0

while I think the following is OK (i.e. no error is what I expect) due 
to the the space before the ';'.


 point, isreal =  F , testinput = "7 ;"n=   7  ios= 0
 point, isreal =  T , testinput = "9 ;"r=   9.  ios= 0
 point, isreal =  T , testinput = "4.4 ;"r=   4.4010  ios=0
 point, isreal =  T , testinput = "9 ;"r=   9.  ios= 0
 point, isreal =  T , testinput = "4,4 ;"r=   4.  ios= 0

* * *

Looking at the other compilers, ifort, ifx and Flang do issue an error 
here. Likewise, g95 seems to yield an error in this case (see below).


I do note that the Lapack testcase that triggered this PR did have such 
a code - but it was then changed because g95 did not like it:


https://github.com/Reference-LAPACK/lapack/commit/64e8a7500d817869e5fcde35afd39af8bc7a8086

In terms of gfortran: until recently did accept it (all versions, 
including 13+14); it then rejected it due to the change in PR105473 (GCC 
14/mainline, backported to 13)– but I now think it rightly did so. With 
the current patch, it is accepted again.


* * *

I have attached the modified testcase linked above; consider adding it 
as well. - Changes to the one of the attachment:

- I added a few additional (albeit boring) tests
- I added an expected output + error diagnostic.

The testcase assumes an error for ';' as separator (with 'point'), 
unless there is a space before it.


[If we want to not diagnose this as vendor extension, we really need to 
add a comment to that testcase besides changing valid = .false. to .true.]


Tobias! { dg-do run }
!
! PR fortran/114304
!
! See also PR fortran/105473
!
! Testing: Does list-directed reading an integer/real allows some non-integer input?
!
! Note: GCC result comments before fix of this PR.

  implicit none
  call t(.true.,  'comma', ';') ! No error shown
  call t(.false., 'point', ';') ! /!\ gfortran: no error, others: error
  call t(.false., 'comma', ',') ! Error shown
  call t(.true.,  'point', ',') ! No error shown
  call t(.false., 'comma', '.') ! Error shown
  call t(.false., 'point', '.') ! Error shown
  call t(.false., 'comma', '5.') ! Error shown
  call t(.false., 'point', '5.') ! gfortran/flang: Error shown, ifort: no error
  call t(.false., 'comma', '5,') ! gfortran: error; others: no error
  call t(.true.,  'point', '5,') ! No error shown
  call t(.true.,  'comma', '5;') ! No error shown
  call t(.false., 'point', '5;') ! /!\ gfortran: no error shown, others: error
  call t(.true.,  'comma', '7 .') ! No error shown
  call t(.true.,  'point', '7 .') ! No error shown
  call t(.true.,  'comma', '7 ,') ! /!\ gfortran: error; others: no error
  call t(.true.,  'point', '7 ,') ! No error shown
  call t(.true.,  'comma', '7 ;') ! No error shown
  call t(.true.,  'point', '7 ;') ! No error shown

!  print *, '---'

  call t(.false., 'comma', '8.', .true.) ! Error shown
  call t(.true.,  'point', '8.', .true.) ! gfortran/flang: Error shown, ifort: no error
  call t(.true.,  'comma', '8,', .true.) ! gfortran: error; others: no error
  call t(.true.,  'point', '8,', .true.) ! No error shown
  call t(.true.,  'comma', '8;', .true.) ! No 

[wwwdocs] projects/gomp/: Update TR12 status - fix misplaced GCC-14; add new items

2024-04-04 Thread Tobias Burnus

TR12 update:
* I misplaced one implemented in GCC 14 in one of the last commits
* Same update as just proposed for libgomp.texi:
  - Renaming of 'coexecute' to 'workdistribute'
(Post TR12 change to avoid confusion with Fortran's co_min,
 co_broadcast, ... intrinsic procedures for coarrays)
  - Add item about { } / BLOCK in canonical loop nests

Comments, suggestions, other remarks before I commit it?

Current 
version:file:///home/tob/repos/gcc-wwwdocs/htdocs/projects/gomp/index.html

Tobias
projects/gomp/: Update TR12 status - fix misplaced GCC-14; add new items

diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index b8f11508..798efb21 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -846,7 +846,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 declare mapper with iterator and present modifiers
-GCC14
+No
 
   
   
@@ -871,7 +871,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 New allocators directive for Fortran
-No
+GCC14
 
   
   
@@ -1225,9 +1225,9 @@ error.
 
   
   
-coexecute directive for Fortran
+workdistribute directive for Fortran
 No
-
+Renamed just after TR12; added in TR12 as coexecute
   
   
 Fortran DO CONCURRENT as associated loop in a loop
@@ -1295,6 +1295,11 @@ error.
 No
 
   
+  
+Canonical loop nest enclosed in (multiple) curly braces (C/C++) or BLOCK constructs (Fortran)
+No
+
+  
   
 Relaxed Fortran restrictions to the aligned clause
 No


[Patch] libgomp.texi: Update entries in OpenMP TR12 implementation status

2024-04-04 Thread Tobias Burnus

Hi all,

this patch updates the OpenMP TR12 status (to-do) items:

(a) 'coexecute', added in TR12, was renamed after TR12 to
'workdistribute'. Reason: Feedback that 'co...' reminds
of Fortran coarrays and the its intrinsic procedures:
co_broadcast, co_max, co_min, co_reduce, co_sum and
→ Honor this in the status but mention old name, hopefully,
  reducing some confusion and ensuring that we miss to update
  that entry once OpenMP 6.0 is released next November

(b) Since TR12, canonical loop nest forms can now be enclosed
in { ... } in C/C++ or in BLOCK in Fortran. Add it to ensure
we won't forget implementing this feature.

Comments, remarks, additions before I commit it?

Tobias

PS: There are surely more items I missed when updating the list
for TR12; I will either have a go later in this year or do it
when updating for the final OpenMP 6.0 in/after November this year.
libgomp.texi: Update entries in OpenMP TR12 implementation status

libgomp/ChangeLog:

	* libgomp.texi (TR12): Honor post-TR12 directive name change; add
	item about curly braces/BLOCK permitted in canonical loop nests.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 1ae0f01ccdc..71d62105a20 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -515,7 +515,8 @@ Technical Report (TR) 12 is the second preview for OpenMP 6.0.
 @item @code{strict} modifier keyword to @code{num_threads} @tab N @tab
 @item @code{atomic} permitted in a construct with @code{order(concurrent)}
   @tab N @tab
-@item @code{coexecute} directive for Fortran @tab N @tab
+@item @code{workdistribute} directive for Fortran @tab N
+  @tab Renamed just after TR12; added in TR12 as @code{coexecute}
 @item Fortran DO CONCURRENT as associated loop in a @code{loop} construct
   @tab N @tab
 @item @code{threadset} clause in task-generating constructs @tab N @tab
@@ -539,6 +540,8 @@ Technical Report (TR) 12 is the second preview for OpenMP 6.0.
 
 @unnumberedsubsec Other new TR 12 features
 @multitable @columnfractions .60 .10 .25
+@item Canonical loop nest enclosed in (multiple) curly braces (C/C++) or BLOCK constructs (Fortran)
+  @tab N @tab
 @item Relaxed Fortran restrictions to the @code{aligned} clause @tab N @tab
 @item Mapping lambda captures @tab N @tab
 @item New @code{omp_pause_stop_tool} constant for omp_pause_resource @tab N @tab


[gcc r14-9774] lto-wrapper.cc: Add offload target name to 'offload_args' suffix

2024-04-03 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:6f91cce9a314cd4bce16fe52a2ffbeb93d59320b

commit r14-9774-g6f91cce9a314cd4bce16fe52a2ffbeb93d59320b
Author: Tobias Burnus 
Date:   Wed Apr 3 15:47:12 2024 +0200

lto-wrapper.cc: Add offload target name to 'offload_args' suffix

lto-wrapper.cc's compile_offload_image calls mkoffload with
an @./a.offload_args argument ('a.' in case of, e.g., 'a.out'). However,
when generating code for both nvptx and gcn, they use the same name
with -save-temps. Hence, this commit adds a  + '.' before
'offload_args' in line with other offload-target-specific files.

gcc/ChangeLog:

* lto-wrapper.cc (compile_offload_image): Prefix 'offload_args'
suffix by the target name.

Diff:
---
 gcc/lto-wrapper.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index ca53e4b462e..610594cdc2b 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -993,7 +993,8 @@ compile_offload_image (const char *target, const char 
*compiler_path,
 
   obstack_ptr_grow (_obstack, NULL);
   argv = XOBFINISH (_obstack, char **);
-  fork_execute (argv[0], argv, true, "offload_args");
+  suffix = concat (target, ".offload_args", NULL);
+  fork_execute (argv[0], argv, true, suffix);
   obstack_free (_obstack, NULL);
 
   free_array_of_ptrs ((void **) paths, n_paths);


[gcc r14-9772] GCN: install.texi update for Newlib change and LLVM 18 release

2024-04-03 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:ce7cb109ff429bcdca03fccfc444b610c6cb528b

commit r14-9772-gce7cb109ff429bcdca03fccfc444b610c6cb528b
Author: Tobias Burnus 
Date:   Wed Apr 3 14:16:41 2024 +0200

GCN: install.texi update for Newlib change and LLVM 18 release

gcc/ChangeLog:

* doc/install.texi (amdgcn-*-amdhsa): Update Newlib recommendation
and update wording for LLVM 18 release.

Diff:
---
 gcc/doc/install.texi | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 269fe7ec870..970b1a67e74 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3944,7 +3944,9 @@ Instead of GNU Binutils, you will need to install LLVM 
15, or later, and copy
 by specifying a @code{--with-multilib-list=} that does not list @code{gfx1100}
 and @code{gfx1103}.
 
-Use Newlib (4.3.0 or newer; 4.4.0 or later is recommended).
+Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commit
+7dd4eb1db (2025-03-25, post-4.4.0) fixes device console output for GFX10 and
+GFX11 devices).
 
 To run the binaries, install the HSA Runtime from the
 @uref{https://rocm.docs.amd.com/,,ROCm Platform}, and use
@@ -3954,8 +3956,8 @@ on the GPU.
 To enable support for GCN3 Fiji devices (gfx803), GCC has to be configured with
 @option{--with-arch=@code{fiji}} or
 @option{--with-multilib-list=@code{fiji},...}.  Note that support for Fiji
-devices has been removed in ROCm 4.0 and support in LLVM is deprecated and will
-be removed in LLVM 18.
+devices has been removed in ROCm 4.0 and support in LLVM was deprecated and has
+been removed in LLVM 18.
 
 @html
 


[Patch] nvptx: In mkoffload.cc, call diagnostic_color_init + gcc_init_libintl

2024-04-03 Thread Tobias Burnus

Nvptx's mkoffload.cc contains 14 'fatal_error' calls and one 'warning_at' call,
which stands out more clearly (color, bold) when enabling
  diagnostic_color_init
which this patch does. — Additionally, the call gcc_init_libintl permits that
the already translated error messages also show up as translation.

OK for mainline?

Tobias

PS: Example: 'nvptx mkoffload:' is bold and 'fatal error:' is in red
in English and some language variants.

nvptx mkoffload: fatal error: COLLECT_GCC must be set.
nvptx mkoffload: 致命的エラー: COLLECT_GCC must be set.
nvptx mkoffload: erreur fatale: COLLECT_GCC doit être défini.
nvptx mkoffload: schwerwiegender Fehler: COLLECT_GCC muss gesetzt sein.

(BTW: It looks as if many languages did not translate the error string
itself, e.g. jp or zh or pl or zh_TW/zh_CN or fi or ...)
nvptx: In mkoffload.cc, call diagnostic_color_init + gcc_init_libintl

gcc/ChangeLog:

	* config/nvptx/mkoffload.cc (main): Call
	gcc_init_libintl and diagnostic_color_init.

 gcc/config/nvptx/mkoffload.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index a7fc28cbd3f..503b1abcefd 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -638,7 +638,9 @@ main (int argc, char **argv)
   const char *outname = 0;
 
   progname = tool_name;
+  gcc_init_libintl ();
   diagnostic_initialize (global_dc, 0);
+  diagnostic_color_init (global_dc);
 
   if (atexit (mkoffload_cleanup) != 0)
 fatal_error (input_location, "atexit failed");


[Patch] lto-wrapper.cc: Add offload target name to 'offload_args' suffix

2024-04-03 Thread Tobias Burnus

Found when working with -save-temps and looking at 'mkoffload'
with a GCC configured for both nvptx and gcn offloading.

Before (for 'a.out') for mkoffload:a.offload_args now: a.amdgcn-amdhsa.offload_args 
and a.nvptx-none.offload_args

OK for mainline?

Tobias

PS: The code does not free the 'xmalloc'ed memory, but that's also
the case of all/most 'concat' in this file; the concat could also
be skipped when no save_temps is used, in case this optimization
makes sense.
lto-wrapper.cc: Add offload target name to 'offload_args' suffix

lto-wrapper.cc's compile_offload_image calls mkoffload with
an @./a.offload_args argument ('a.' in case of, e.g., 'a.out'). However,
when generating code for both nvptx and gcn, they use the same name
with -save-temps. Hence, this commit adds a  + '.' before
'offload_args' in line with other offload-target-specific files.

gcc/ChangeLog:

	* lto-wrapper.cc (compile_offload_image): Prefix 'offload_args'
	suffix by the target name.

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index ca53e4b462e..610594cdc2b 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -993,7 +993,8 @@ compile_offload_image (const char *target, const char *compiler_path,
 
   obstack_ptr_grow (_obstack, NULL);
   argv = XOBFINISH (_obstack, char **);
-  fork_execute (argv[0], argv, true, "offload_args");
+  suffix = concat (target, ".offload_args", NULL);
+  fork_execute (argv[0], argv, true, suffix);
   obstack_free (_obstack, NULL);
 
   free_array_of_ptrs ((void **) paths, n_paths);


[gcc r14-9770] GCN: Fix --with-arch= handling in mkoffload [PR111966]

2024-04-03 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:b2460d621efe740bd95ad41afef6d806ec1bd9c7

commit r14-9770-gb2460d621efe740bd95ad41afef6d806ec1bd9c7
Author: Tobias Burnus 
Date:   Wed Apr 3 12:37:39 2024 +0200

GCN: Fix --with-arch= handling in mkoffload [PR111966]

The default -march= setting used in mkoffload did not reflect the modified
default set by GCC's configure-time --with-arch=, causing issues when
generating debug code.

gcc/ChangeLog:

PR other/111966
* config/gcn/mkoffload.cc (get_arch): New; moved -march= flag
handling from ...
(main): ... here; call it to handle --with-arch config option
and -march= commandline.

Diff:
---
 gcc/config/gcn/mkoffload.cc | 90 -
 1 file changed, 72 insertions(+), 18 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 04356b86195..9a438de331a 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -35,6 +35,8 @@
 #include "gomp-constants.h"
 #include "simple-object.h"
 #include "elf.h"
+#include "configargs.h"  /* For configure_default_options.  */
+#include "multilib.h"  /* For multilib_options.  */
 
 /* These probably won't (all) be in elf.h for a while.  */
 #undef  EM_AMDGPU
@@ -846,6 +848,62 @@ compile_native (const char *infile, const char *outfile, 
const char *compiler,
   obstack_free (_obstack, NULL);
 }
 
+static int
+get_arch (const char *str, const char *with_arch_str)
+{
+  if (strcmp (str, "fiji") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX803;
+  else if (strcmp (str, "gfx900") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX900;
+  else if (strcmp (str, "gfx906") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX906;
+  else if (strcmp (str, "gfx908") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX908;
+  else if (strcmp (str, "gfx90a") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+  else if (strcmp (str, "gfx1030") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1030;
+  else if (strcmp (str, "gfx1036") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1036;
+  else if (strcmp (str, "gfx1100") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1100;
+  else if (strcmp (str, "gfx1103") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1103;
+
+  error ("unrecognized argument in option %<-march=%s%>", str);
+
+  /* The suggestions are based on the configured multilib support; the compiler
+ itself might support more.  */
+  if (multilib_options[0] != '\0')
+{
+  /* Example: "march=gfx900/march=gfx906" */
+  char *args = (char *) alloca (strlen (multilib_options));
+  const char *p = multilib_options, *q = NULL;
+  args[0] = '\0';
+  while (true)
+   {
+ p = strchr (p, '=');
+ if (!p)
+   break;
+ if (q)
+   strcat (args, ", ");
+ ++p;
+ q = strchr (p, '/');
+ if (q)
+   strncat (args, p, q-p);
+ else
+   strcat (args, p);
+   }
+  inform (UNKNOWN_LOCATION, "valid arguments to %<-march=%> are: %s", 
args);
+}
+  else if (with_arch_str)
+inform (UNKNOWN_LOCATION, "valid argument to %<-march=%> is %qs", 
with_arch_str);
+
+  exit (FATAL_EXIT_CODE);
+
+  return 0;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -853,9 +911,21 @@ main (int argc, char **argv)
   FILE *out = stdout;
   FILE *cfile = stdout;
   const char *outname = 0;
+  const char *with_arch_str = NULL;
 
   progname = tool_name;
+  gcc_init_libintl ();
   diagnostic_initialize (global_dc, 0);
+  diagnostic_color_init (global_dc);
+
+  for (size_t i = 0; i < ARRAY_SIZE (configure_default_options); i++)
+if (configure_default_options[i].name != NULL
+   && strcmp (configure_default_options[i].name, "arch") == 0)
+  {
+   with_arch_str = configure_default_options[0].value;
+   elf_arch = get_arch (configure_default_options[0].value, NULL);
+   break;
+  }
 
   obstack_init (_to_cleanup);
   if (atexit (mkoffload_cleanup) != 0)
@@ -961,24 +1031,8 @@ main (int argc, char **argv)
   else if (strcmp (argv[i], "-dumpbase") == 0
   && i + 1 < argc)
dumppfx = argv[++i];
-  else if (strcmp (argv[i], "-march=fiji") == 0)
-   elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
-  else if (strcmp (argv[i], "-march=gfx900") == 0)
-   elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
-  else if (strcmp (argv[i], "-march=gfx906") == 0)
-   elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
-  else if (strcmp (argv[i], "-march=gfx908") == 0)
-   elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
-  else if (strcmp (argv[i], "-march=gfx90a") == 0)
-   elf_arch = EF_AMDGPU_MACH_AM

Re: [Patch] GCN: install.texi update for Newlib change and LLVM 18 release

2024-04-03 Thread Tobias Burnus

Hi Jakub, hello world

Jakub Jelinek wrote:

On Wed, Apr 03, 2024 at 11:09:19AM +0200, Tobias Burnus wrote:

@@ -3954,8 +3956,8 @@ on the GPU.
  To enable support for GCN3 Fiji devices (gfx803), GCC has to be configured 
with
  @option{--with-arch=@code{fiji}} or
  @option{--with-multilib-list=@code{fiji},...}.  Note that support for Fiji 
[...]
+devices has been removed in ROCm 4.0 and support in LLVM is deprecated and has
+been removed in LLVM 18.

Shouldn't we at configure time then detect the case where fiji can't be
supported and either error if it is included explicitly in multilib list, or
implicitly take it out from that list and arrange error to be emitted when
using -march=fiji/gfx803 ?


I am not sure that it is really needed for the reasons given below.
And while it would help some specific use (having LLVM 17 and wanting to use 
Fiji),
it will also cause some confusion as GCC 14 will magically behave differently
depending how build.

Additionally:

* I bet most use gcc/config.gcc which works in most cases just fine
  (LLVM >= 17; enabling all but Fiji)

* Fiji itself is old – removed from recent ROCm and LLVM >= 18,
  which also implies that it is seen as not seeing a lot of use

While there is no configure-time check, using Fiji with LLVM 18 will
fail with a semi-clear compile-time error when doing the in-tree newlib
build or the libgomp build.
(This shows up by default as issue with LLVM 18 + GCC 12/13;
 see https://gcc.gnu.org/PR114419)

Likewise, it will fail with LLVM < 15 when building gfx1100/gfx1103.

* * *

Note: The compiler itself is perfectly happy to handle fiji and gfx1100 itself,
just the LLVM MC assembler doesn't support one [< 15] or the other [>=LLVM 18].

* * *

For those tracking GCC or caring, the documentation at
  https://gcc.gnu.org/gcc-14/changes.html#amdgcn
and
  https://gcc.gnu.org/install/specific.html#amdgcn-x-amdhsa
provides some glory details.

And it is also mentioned at https://gcc.gnu.org/wiki/Offloading


Tobias



[Patch] GCN: install.texi update for Newlib change and LLVM 18 release

2024-04-03 Thread Tobias Burnus

Update for the GCN Newlib commit 7dd4eb1db "amdgcn: Implement proper locks",
https://sourceware.org/git/?p=newlib-cygwin.git;a=commit;h=7dd4eb1db

And change future to past tense regarding the LLVM 18 release.

OK for mainline?

Thanks,

Tobias
GCN: install.texi update for Newlib change and LLVM 18 release

gcc/ChangeLog:

	* doc/install.texi (amdgcn-*-amdhsa): Update Newlib recommendation
	and update wording for LLVM 18 release.

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 269fe7ec870..022bc32901c 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3944,7 +3944,9 @@ Instead of GNU Binutils, you will need to install LLVM 15, or later, and copy
 by specifying a @code{--with-multilib-list=} that does not list @code{gfx1100}
 and @code{gfx1103}.
 
-Use Newlib (4.3.0 or newer; 4.4.0 or later is recommended).
+Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commit
+7dd4eb1db (2025-03-25, post-4.4.0) fixes device console output for GFX10 and
+GFX11 devices).
 
 To run the binaries, install the HSA Runtime from the
 @uref{https://rocm.docs.amd.com/,,ROCm Platform}, and use
@@ -3954,8 +3956,8 @@ on the GPU.
 To enable support for GCN3 Fiji devices (gfx803), GCC has to be configured with
 @option{--with-arch=@code{fiji}} or
 @option{--with-multilib-list=@code{fiji},...}.  Note that support for Fiji
-devices has been removed in ROCm 4.0 and support in LLVM is deprecated and will
-be removed in LLVM 18.
+devices has been removed in ROCm 4.0 and support in LLVM is deprecated and has
+been removed in LLVM 18.
 
 @html
 


[Patch] GCN: Fix --with-arch= handling in mkoffload [PR111966]

2024-04-03 Thread Tobias Burnus

This patch handles --with-arch= in GCN's mkoffload.cc

While mkoffload mostly does not know this and passes it through to the GCN lto1 
compiler,
it writes an .o file with debug information - and here the -march= in the ELF 
flags must
agree with the one in the other files. Hence, it uses now the --with-arch= 
config argument.

Doing so, there is now a diagnostic if the -march= or --with-arch= is unknown. 
While the
latter should be rejected at GCC compile time, the latter was not diagnosed in 
mkoffload
but only later in GCN's compiler.

But as there is now a fatal_error in mkoffload, which comes before the 
GCN-compiler call,
the 'note:' which devices are available were lost. This has been reinstated by 
using
the multilib settings. (That's not identical to the compiler supported flags 
the output
is reasonable, arguable better or worse than lto1.)

Advantage: The output is less cluttered than a later fail.

To make mkoffload errors - and especially this one - more useful, it now also 
initializes
the colorization / bold.

OK for mainline?

* * *

Example error:

gcn mkoffload: error: unrecognized argument in option '-march=gfx'
gcn mkoffload: note: valid arguments to '-march=' are: gfx906, gfx908, gfx90a, 
gfx1030, gfx1036, gfx1100, gfx1103

where on my TERM=xterm-256color,  'gcn mkoffload:' and the quoted texts are in 
bold,
'error:' is red and 'note:' is cyan.

Compared to cc1, the 'note:' lacks 'fiji', the list is separated by ', '
instead of ' ', and cc1 has a "; did you mean 'gfx1100'?".
And the program name is 'gcn mkoffload' instead of 'cc1'.

Tobias

PS: The generated multilib list could be later changed to be based on the 
gcn-.def file;
or we just keep the multiconfig variant of this patch.
GCN: Fix --with-arch= handling in mkoffload [PR111966]

The default -march= setting used in mkoffload did not reflect the modified
default set by GCC's configure-time --with-arch=, causing issues when
generating debug code.

gcc/ChangeLog:

	PR other/111966
	* config/gcn/mkoffload.cc (get_arch): New; moved -march= flag
	handling from ...
	(main): ... here; call it to handle --with-arch config option
	and -march= commandline.

 gcc/config/gcn/mkoffload.cc | 90 -
 1 file changed, 72 insertions(+), 18 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 04356b86195..31266d2099b 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -35,6 +35,8 @@
 #include "gomp-constants.h"
 #include "simple-object.h"
 #include "elf.h"
+#include "configargs.h"  /* For configure_default_options.  */
+#include "multilib.h"  /* For multilib_options.  */
 
 /* These probably won't (all) be in elf.h for a while.  */
 #undef  EM_AMDGPU
@@ -846,6 +848,62 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_free (_obstack, NULL);
 }
 
+int
+get_arch (const char *str, const char *with_arch_str)
+{
+  if (strcmp (str, "fiji") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX803;
+  else if (strcmp (str, "gfx900") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX900;
+  else if (strcmp (str, "gfx906") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX906;
+  else if (strcmp (str, "gfx908") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX908;
+  else if (strcmp (str, "gfx90a") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+  else if (strcmp (str, "gfx1030") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1030;
+  else if (strcmp (str, "gfx1036") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1036;
+  else if (strcmp (str, "gfx1100") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1100;
+  else if (strcmp (str, "gfx1103") == 0)
+return EF_AMDGPU_MACH_AMDGCN_GFX1103;
+
+  error ("unrecognized argument in option %<-march=%s%>", str);
+
+  /* The suggestions are based on the configured multilib support; the compiler
+ itself might support more.  */
+  if (multilib_options[0] != '\0')
+{
+  /* Example: "march=gfx900/march=gfx906" */
+  char *args = (char *) alloca (strlen (multilib_options));
+  const char *p = multilib_options, *q = NULL;
+  args[0] = '\0';
+  while (true)
+	{
+	  p = strchr (p, '=');
+	  if (!p)
+	break;
+	  if (q)
+	strcat (args, ", ");
+	  ++p;
+	  q = strchr (p, '/');
+	  if (q)
+	strncat (args, p, q-p);
+	  else
+	strcat (args, p);
+	}
+  inform (UNKNOWN_LOCATION, "valid arguments to %<-march=%> are: %s", args);
+}
+  else if (with_arch_str)
+inform (UNKNOWN_LOCATION, "valid argument to %<-march=%> is %qs", with_arch_str);
+
+  exit (FATAL_EXIT_CODE);
+
+  return 0;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -853,9 +911,21 @@ main (int argc, char **argv)
   FILE *out = stdout;
   FILE *cfile = stdout;
   const char *outname = 0;
+  const char *with_arch_str = NULL;
 
   progname = tool_name;
+  gcc_init_libintl ();
   diagnostic_initialize (global_dc, 0);
+  diagnostic_color_init (global_dc);
+
+  for (size_t i = 0; i < ARRAY_SIZE 

Re: [PATCH] amdgcn: Add gfx1036 target

2024-03-25 Thread Tobias Burnus

Richard Biener wrote:

I'll follow up with the libgomp testing test summary for archival
purposes.  I still see linker errors for testcases using -g
(the ld: ^[[0;31merror: ^[[0mincompatible mach:
/tmp/ccr0oDpD.mkoffload.dbg.o^M kind)


Hmm, odd – can you try compile with -save-temp and look at the relevant 
files with, e.g., readelf -h on the GCN files (e.g. 'readelf -h 
*.xamdgcn-amdhsa.mkoffload.*o') – that should show under "Flags" what 
the program was compiled for.


We did encounter this issue with LLVM 18 and the solution was explicitly 
set the version both in the compiler via gcc/config/gcn/gcn-hsa.h's


#define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
 "!march=*|march=*:--amdhsa-code-object-version=4"

and for the debugging data in mkoffload.cc's

  ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
 ? ELFABIVERSION_AMDGPU_HSA_V3
 : ELFABIVERSION_AMDGPU_HSA_V4);

But I fail to see why this doesn't work for you - you should get V4 for 
your gfx1036 target.


Here, ELFABIVERSION_AMDGPU_HSA_V4 2 (V1 did not have a number and V2 
started with 0, hence V3 = 1 etc.)


What LLVM version did you use for the assembler (llvm-mc)?

Tobias


Re: [Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Tobias Burnus

Hi Andrew,

Andrew Stubbs wrote:
This is more-or-less what I was planning to do myself, but as I want 
to include all the other features that get parametrized in gcn.cc, 
gcn.h, gcn-hsa.h, gcn-opts.h, I hadn't got around to it yet. 
Unfortunately, I think the gcn.opt and config.gcc will always need 
manually updating, but if that's all it'll be an improvement.


Well, for .opt see how nvptx does it – it actually generates an .opt file.


I don't like the idea of including AMDGPU_ISA_UNSUPPORTED;


I concur – I was initially thinking of reporting the device name 
("Unsupported %s") but I then realized that the agent returns a string 
while only for GCC generated files (→ eflag) the hexcode is used. Thus, 
I ended up not using it.


Ultimately, I want to replace many of the conditionals like 
"TARGET_CDNA2_PLUS" from the code and replace them with feature flags 
derived from a def file, or at least a header file. We've acquired too 
many places where there are unsearchable conditionals that need 
finding and fixing every time a new device comes along.
I was thinking of having more flags, but those where the only ones 
required for the two files.
I had imagined that this .def file would exist in gcc/config/gcn, but 
you've placed it in libgomp maybe it makes sense to have multiple 
such files if they contain very different data, but I had imagined one 
file and I'm not sure that the compiler definitions live in libgomp.


There is already:

gcc/config/darwin-c.cc:#include "../../libcpp/internal.h"

gcc/config/gcn/gcn-run.cc:#include 
"../../../libgomp/config/gcn/libgomp-gcn.h"


gcc/fortran/cpp.cc:#include "../../libcpp/internal.h"

gcc/fortran/trigd_fe.inc:#include "../../libgfortran/intrinsics/trigd.inc"

But there is also the reverse:

libcpp/lex.cc:#include "../gcc/config/i386/cpuid.h"

libgfortran/libgfortran.h:#include "../gcc/fortran/libgfortran.h"

lto-plugin/lto-plugin.c:#include "../gcc/lto/common.h"

If you add more items, it is probably better to have it under 
gcc/config/gcn/ - and I really prefer a single file for all.


* * *

Talking about feature sets: This would be a bit like LLVM (see below) 
but I think they have a bit too much indirections. But I do concur that 
we need to consolidate the current support – and hopefully make it 
easier to keep adding more GPU support; we seem to have already covered 
a larger chunk :-)


I also did wonder whether we should support, e.g., running a gfx1100 
code (or a gfx11-generic one) on, e.g., a gfx1103 device. Alternatively, 
we could keep the current check which requires an exact match.


BTW: I do note that looking at the feature sets in LLVM that all GFX110x 
GPUs seem to have common silicon bugs: FeatureMSAALoadDstSelBug and 
FeatureMADIntraFwdBug, while 1100 and 1102 additionally have the 
FeatureUserSGPRInit16Bug but 1101 and 1103 don't. — For some reasons, 
FeatureISAVersion11_Generic only consists of two of those bugs (it 
doesn't have FeatureMADIntraFwdBug), which doesn't seem to be that 
consistent. Maybe the workaround has issues elsewhere? If so, a generic 
-march=gfx11 might be not as useful as one might hope for.


* * *

If I look at LLVM's 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPU.td 
,


they first define several features – like 'FeatureUnalignedScratchAccess'.

Then they combine them like in:

def FeatureISAVersion11_Common ... [FeatureGFX11, ... 
FeatureAtomicFaddRtnInsts ...


And then they use those to map them to feature sets like:

def FeatureISAVersion11_0_Common ... 
listconcat(FeatureISAVersion11_Common.Features,

    [FeatureMSAALoadDstSelBug ...

And for gfx1103:

def FeatureISAVersion11_0_3 : FeatureSet<
  !listconcat(FeatureISAVersion11_0_Common.Features,
    [])>;

The mapping to gfx... names then happens in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/GCNProcessors.td 
such as:


def : ProcessorModel<"gfx1103", GFX11SpeedModel,
  FeatureISAVersion11_0_3.Features
>;

Or for the generic one, i.e.:

// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
  FeatureISAVersion11_Generic.Features

LLVM also has some generic flags like the following in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/TargetParser/TargetParser.cpp


    {{"gfx1013"},   {"gfx1013"}, GK_GFX1013, 
FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},


I hope that this will give some inspiration – but I assume that at least 
the initial implementation will be much shorter.


Tobias



[Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Tobias Burnus
Given the large number of AMD GPU ISAs and the number of files which 
have to be adapted, I wonder whether it makes sense to consolidate this 
a bit, especially in the light that we may want to support more in the 
future.


Besides using some macros, I also improved the diagnostic if the object 
code couldn't be recognized (shouldn't happen) or if the GPU is 
unsupported (likely; it now prints the GPU string). I was initially 
thinking of resolving the arch encoded in the eflag to a string, but as 
this is about GCC-generated code, it seemed to be unlikely of much use. 
[It should that rare that we might also go back to the static string 
instead of outputting the hex value of the eflag.]


Note: I only modified mkoffload.cc and plugin-gcn.c, but with some 
tweaks it could also be used for other files in gcc/config/gcn/.


If you add a new ISA, you still need to update plugin-gcn.c's 
max_isa_vgprs and the xnack/sram-ecc handling in mkoffload.c's main, but 
that should be all for those two files.


Thoughts?

Tobias

PS: I think the patch is fine and builds, but I have not tested it on an 
AMD GPU machine, yet.


PPS: For using for other files, see also in config/nvptx which uses 
nvptx-sm.def to generate several files.
GCN: Define ISA archs in gcn-devices.def and use it

Adding new a GCN ISAs requires to update many files, making it more
likely to miss a file; by adding the gcn-devices.def file and using
it in config/gcn/mkoffload.cc and libgomp/plugin/plugin-gcn.c, it
reduces the duplications.

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_...): Replace
	explicit #define by an enum created from gcn-devices.def.
	(main): Use gcn-devices.def definitions for -march=gfx.* string
	parsing.

libgomp/ChangeLog:

	* plugin/gcn-devices.def: New file.
	* plugin/plugin-gcn.c (gcn_..._s): Remove.
	(enum EF_AMDGPU_MACH): Generate EF_AMDGPU_MACH_AMDGCN_...
	using gcn-devices.def.
	(isa_hsa_name, isa_gcc_name, isa_code): Use gcn-devices.def
	to handle the ISAs.
	(max_isa_vgprs): Update used enum name (GFX90a -> GFX90A).
	(isa_matches_agent, GOMP_OFFLOAD_init_device): Be more verbose
	in case of an unsupported ISA.

 gcc/config/gcn/mkoffload.cc|  42 ++-
 libgomp/plugin/gcn-devices.def |  62 ++
 libgomp/plugin/plugin-gcn.c| 118 +++--
 3 files changed, 119 insertions(+), 103 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index fe443abba21..081110d7030 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -47,20 +47,14 @@
 #undef  ELFABIVERSION_AMDGPU_HSA_V4
 #define ELFABIVERSION_AMDGPU_HSA_V4 2
 
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX803
-#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX900
-#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX906
-#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX908
-#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX90a
-#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
-#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1100
-#define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
+/* Use an enum as macros cannot define macros and
+   assume that EF_AMDGPU_MACH_AMDGCN_... is not #defined.  */
+enum {
+#define AMDGPU_ISA(suffix, str, val) \
+ EF_AMDGPU_MACH_AMDGCN_ ## suffix = val,
+#include "../libgomp/plugin/gcn-devices.def"
+#undef AMDGPU_ISA
+};
 
 #define EF_AMDGPU_FEATURE_XNACK_V4	0x300  /* Mask.  */
 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4	0x000
@@ -959,18 +953,12 @@ main (int argc, char **argv)
 	dumppfx = argv[++i];
   else if (strcmp (argv[i], "-march=fiji") == 0)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
-  else if (strcmp (argv[i], "-march=gfx900") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
-  else if (strcmp (argv[i], "-march=gfx906") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
-  else if (strcmp (argv[i], "-march=gfx908") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
-  else if (strcmp (argv[i], "-march=gfx90a") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
-  else if (strcmp (argv[i], "-march=gfx1030") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
-  else if (strcmp (argv[i], "-march=gfx1100") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100;
+#define AMDGPU_ISA(suffix, str, val) \
+  else if (strcmp (argv[i], "-march=" str) == 0) \
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_ ## suffix;
+#include "../libgomp/plugin/gcn-devices.def"
+#undef AMDGPU_ISA
+
 #define STR "-mstack-size="
   else if (startswith (argv[i], STR))
 	gcn_stack_size = atoi (argv[i] + strlen (STR));
@@ -1029,7 +1017,7 @@ main (int argc, char **argv)
   if (TEST_SRAM_ECC_UNSET (elf_flags))
 	SET_SRAM_ECC_ANY (elf_flags);
   break;
-case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+case EF_AMDGPU_MACH_AMDGCN_GFX90A:
   if (TEST_XNACK_UNSET 

Re: OpenACC 2.7: front-end support for readonly modifier: Add basic OpenACC 'declare' testing

2024-03-14 Thread Tobias Burnus

Hi all, hi Thomas & Chung-Lin,

Thomas Schwinge wrote:

But I realized another thing: don't we have to handle the 'readonly'
modifier also in Fortran module files, that is, next to the OpenACC
'declare' 'copyin' handling in 'gcc/fortran/module.cc':
'AB_OACC_DECLARE_COPYIN' etc.?


I bet so; it is not as bad as with the others as it is "only" an 
optimization hint, but it makes sense to make it available.


Note that when you place the 'module' in the same file as the module 
users ('use'), the compiler might know things because they are in the 
same translation unit / file not because it is in the module ...



  Chung-Lin, please check, via test cases.
'gfortran.dg/goacc/routine-module*', for example, should provide some
guidance of how to achieve actual module file use, and then do the same
'scan-tree-dump' as in the current 'readonly' modifier test cases.

...

By means of only emitting a tag
in the module file if the 'readonly' modifier is specified, we should
maintain compatibility with the current 'MOD_VERSION'.


That was the idea: If only new information gets added (if used), older 
compilers still work. This has huge limitations and does not work as 
well as imagined but here it should work: Older .mod will work with new 
compilers, even though the reverse might not be true.


Tobias


Re: OpenACC 2.7: front-end support for readonly modifier: Add basic OpenACC 'declare' testing

2024-03-14 Thread Tobias Burnus

Hi all, hi Thomas & Chung-Lin,

Thomas Schwinge wrote:

But I realized another thing: don't we have to handle the 'readonly'
modifier also in Fortran module files, that is, next to the OpenACC
'declare' 'copyin' handling in 'gcc/fortran/module.cc':
'AB_OACC_DECLARE_COPYIN' etc.?


I bet so; it is not as bad as with the others as it is "only" an
optimization hint, but it makes sense to make it available.

Note that when you place the 'module' in the same file as the module
users ('use'), the compiler might know things because they are in the
same translation unit / file not because it is in the module ...


  Chung-Lin, please check, via test cases.
'gfortran.dg/goacc/routine-module*', for example, should provide some
guidance of how to achieve actual module file use, and then do the same
'scan-tree-dump' as in the current 'readonly' modifier test cases.

...

By means of only emitting a tag
in the module file if the 'readonly' modifier is specified, we should
maintain compatibility with the current 'MOD_VERSION'.


That was the idea: If only new information gets added (if used), older
compilers still work. This has huge limitations and does not work as
well as imagined but here it should work: Older .mod will work with new
compilers, even though the reverse might not be true.

Tobias


Re: [PATCH v2] openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls

2024-03-14 Thread Tobias Burnus

Hi Kwok,

On January 22, 2024, Kwok Cheung Yeung wrote:
There was a bug in the declare-target-indirect-2.c libgomp testcase 
(testing indirect calls in offloaded target regions, spread over 
multiple teams/threads) that due to an errant fallthrough in a switch 
statement resulted in only one indirect function ever getting called:


(When applying, also the 'dg-xfail-run-if' needs to be removed from
libgomp.fortran/declare-target-indirect-2.f90) ...

However, when the missing break statements are added, the testcase 
fails with an invalid memory access. Upon investigation, this is due 
to the use of a splay-tree as the lookup structure for indirect 
addresses, as the splay-tree moves frequently accessed elements closer 
to the root node and so needs locking when used from multiple threads. 
However, this would end up partially serialising all the threads and 
kill performance. I have switched the lookup structure from a splay 
tree to a hashtab instead to avoid locking during lookup.


I have also tidied up the initialisation of the lookup table by 
calling it only from the first thread of the first team, instead of 
redundantly calling it from every thread and only having the first one 
reached do the initialisation. This removes the need for locking 
during initialisation.


LGTM - except of the following, which we need to solve
(as suggested or differently (locking, or ...) or
by declaring it a nonissue (e.g. because of thinko of mine).

Thoughts about the following?

* * *

Namely, I wonder whether there will be an issue for

#pragma target nowait
   ...
#pragma target
   ...

Once the kernel is started, thegcn_expand_prologue creates some setup code and then a call to 
gomp_gcn_enter_kernel. Likewise for gcc/config/nvptx/nvptx.cc, where 
nvptx_declare_function_name adds via write_omp_entry a call to 
gomp_nvptx_main. And one of the first tasks there is 'build_indirect_map'. Assume a very simple kernel for the second item (i.e. it is quickly started)

and a very large number of reverse kernels.

Now, I wonder whether it is possible to have a race between the two kernels;
it seems as if that might happen but is extremely unlikely accounting for all
the overhead of launching and the rather small list of reverse offload items.

As it is unlikely, I wonder whether doing the following lock free, opportunistic
approach will be the best solution. Namely, assuming that no other kernel 
updates
the hash, but if that happens by chance, use the one that was created first.
(If we are lucky, the atomic overhead is fully cancelled by using a local
variable in the function but neither should matter much.)

if (!indirect_htab) // or: __atomic_load_n (_htab, __ATOMIC_RELAXED) ?
{
  htab_t local_indirect_htab = htab_create (num_ind_funcs);
  ...
  htab_t expected = NULL;
  __atomic_compare_exchange_n (_htab, ,
   local_indirect_htab, false, ...);
  if (expected) // Other kernel was faster, drop our version
htab_free (local_indirect_htab);
}

On January 29, 2024, Kwok Cheung Yeung wrote:
Can you please akso update the comments to talk about hashtab instead 
of splay?
This version has the comments updated and removes a stray 'volatile' 
in the #ifdefed out code.

Thanks,

Tobias



Re: [PATCH, OpenACC 2.7] struct/array reductions for Fortran

2024-03-13 Thread Tobias Burnus

Hi Chung-Lin, hi Thomas, hello world,

some thoughts glancing at the patch.

Chung-Lin Tang wrote:

There is still some shortcomings in the current state, mainly that only explicit-shaped 
arrays can be used (like its C counterpart). Anything else is currently a bit more 
complicated in the middle-end, since the existing reduction code creates an 
"init-op" (literal of initial values) which can't be done when say 
TYPE_MAX_VALUE (TYPE_DOMAIN (array_type)) is not a tree constant. I think we'll be on the 
hook to solve this later, but I think the current state is okay to submit.


I think having some initial support is fine, but it needs an 
understandable and somewhat complete error diagnostic and testcases. 
More to this below.



+  if (!TREE_CONSTANT (min_tree) || !TREE_CONSTANT (max_tree))
+   {
+ error_at (loc, "array in reduction must be of constant size");
+ return error_mark_node;
+   }

Shouldn't this use a sorry_at instead?


+ /* OpenACC current only supports array reductions on explicit-shape
+arrays.  */
+ if ((n->sym->as && n->sym->as->type != AS_EXPLICIT)
+ || n->sym->attr.codimension)
gfc_error ("Array %qs is not permitted in reduction at %L",
   n->sym->name, >where);
[Coarray excursion. I am in favor of allowing it for the reasons above, 
but it could be also rejected but I would prefer to have a proper error 
message in that case.]


While coarrays are unspecified, I do not see a reason why a corray 
shouldn't be permitted here – as long as it is not coindexed. At the 
end, it is just a normal array with some additional properties, which 
make it possible to remotely access it.


Note: For coarray scalars, we have 'sym->as', thus the check should be 
'(n->sym->as && n->sym->as->rank)' to permit scalar coarrays.


* * *

Coarray excursion: A coarray variables exists in multiple processes 
("images", e.g. MPI processes). If 'caf' and 'caf2' are coarrays, then 
'caf = 5' and 'i = caf2' refer to the local variable.


On the other hand, 'caf[n] = 5' or 'i = caf[3,m]' refers to the 'caf' 
variable on image 'n' or [3,m]', respectively, which implies in general 
some function call to read or set the remote data, unless the memory is 
directly accessible (→ e.g. some offset calculation) and the compiler 
already knows how to handle this.


While a coarrary might be allocated in some special memory, as long as 
one uses the local version (i.e. not coindexed / without the image index 
in brackets).


Assume for the example above, e.g., integer :: caf[*], caf2[3:6, 7:*].

* * *

Thus, in terms of OpenACC or OpenMP, there is no reason to fret a 
coarray as long as it is not coindexed and as long as OpenMP/OpenACC 
does not interfere with the memory allocation – either directly ('!$omp 
allocators') or indirectly by placing it into special memory (pinned, 
pseudo-unified-shared memory → OG13's -foffload-memory=pinned/unified).


In the meanwhile, OpenMP actually explicitly allows coarrays with few 
exceptions while OpenACC talks about unspecified behavior.


* * *

Back to generic comments:

If I look at the existing code, I see at gfc_match_omp_clause_reduction:


 if (gfc_match_omp_variable_list (" :", >lists[list_idx], false, NULL,
  , openacc, allow_derived) != 
MATCH_YES)


If 'openacc' is true, array sections are permitted - but the code added 
(see quote above) does not handle n->expr at all and only n->sym.


I think there needs to be at least a "gfc_error ("Sorry, subarrays/array 
sections not yet handled" [subarray is the OpenACC wording, 'array 
section' is the Fortran one, which might be clearer.


But you could consider to handle at least array elements, i.e. 
n->expr->rank == 0.


Additionally, I think the current error message is completely unhelpful 
given that some arrays are supported but most are not.


I think there should be also some testcases for the not-yet-supported 
case. I think the following will trigger the omp-low.cc 'sorry_at' (or 
currently 'error' - but I think it should be a sorry):


subroutine foo(n)

integer :: n, A(n)

... reduction(+:A)

And most others will trigger in openmp.cc; for those, you should have an 
allocatable/pointer and assumed-shape arrays for the diagnostic testcase 
as well.


* * *

I have not really experimented with the code, but does it handle 
multi-dimensional constant arrays like 'integer :: a(3:6,10,-1:1)' ? — I 
bet it does, at least after handling my example [2] for the C patch [1].


Thanks,

Tobias

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641669.html

[2] https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647704.html



Re: [PATCH, OpenACC 2.7] Implement reductions for arrays and structs

2024-03-13 Thread Tobias Burnus

Hi Chung-Lin,


https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641669.html

Chung-Lin Tang wrote:

this patch implements reductions for arrays and structs for OpenACC. Following 
the pattern for OpenACC reductions [...]


(Stumbled over while looking at the Fortran patch, but applying to 
C/C++, hence mentioned here; the Fortran patch is at 
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645205.html )



OpenACC permits array elements and subarrays. I have not checked whether 
array elements are currently rejected or fully supported, but I miss a 
testcase for both array elements (unless there is one already) and array 
sections.


If implemented, I think there should be a working run-time test.
If not supported, there should be a sorry_at error for those.

Note: the parser should handle array sections as OpenMP handles them.

The testcase should cover something like the following:

void f(int n)
{
  int x[5][5]; // Multimensional array;
  int y[n]; // VLA
  int *z = (int*)malloc(5*5*sizeof(int)); // Allocated array

... reduction(+:x)
... reduction(+:y)

... reduction(+:x[0:5][2:1])  // OK
... reduction(+:x[1:4][2:1])
  // invalid - while contiguous, first dim does not span the whole array
... reduction(+:y[2:2])  // OK
... reduction(+:y[3:])  // OK - same as [3:n-3]
... reduction(+:y[:2])  // OK - same as [0:2]
... reduction(+:z[1:2][1:6])  // OK

And the same where at least one of the const number is replaced by
a variable.

Note: The 'invalid' reduction is fine in terms of being contiguous (last 
dimension contains a single element, hence, the dimension before does 
not need to span the whole extend) - but OpenACC requires the all 
dimensions but the last to span the whole range.


See "2.7.1 Data Specification in Data Clauses" for the subarray description.

I think - if known at compile time - there should be also a diagnostic 
if the any dimension but the last does not span the whole range.


Thanks,

Tobias


[gcc r14-9446] OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

2024-03-13 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:c5037fcee2de438774466e78e46e6ab4df72a7fe

commit r14-9446-gc5037fcee2de438774466e78e46e6ab4df72a7fe
Author: Tobias Burnus 
Date:   Wed Mar 13 09:35:28 2024 +0100

OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

Dummy procedures look similar to variables but aren't - neither in Fortran
nor in OpenMP. As the middle end sees PARM_DECLs, mark them as predetermined
firstprivate for mapping (as already done in gfc_omp_predetermined_sharing).

This does not address the isses related to procedure pointers, which are
still discussed on spec level [see PR].

PR fortran/114283

gcc/fortran/ChangeLog:

* trans-openmp.cc (gfc_omp_predetermined_mapping): Map dummy
procedures as firstprivate.

libgomp/ChangeLog:

* testsuite/libgomp.fortran/declare-target-indirect-4.f90: New test.

Diff:
---
 gcc/fortran/trans-openmp.cc|  9 +
 .../libgomp.fortran/declare-target-indirect-4.f90  | 43 ++
 2 files changed, 52 insertions(+)

diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index a2bf15665b3..1dba47126ed 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -343,6 +343,15 @@ gfc_omp_predetermined_mapping (tree decl)
&& GFC_DECL_SAVED_DESCRIPTOR (decl)))
 return OMP_CLAUSE_DEFAULTMAP_TO;
 
+  /* Dummy procedures aren't considered variables by OpenMP, thus are
+ disallowed in OpenMP clauses.  They are represented as PARM_DECLs
+ in the middle-end, so return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE here
+ to avoid complaining about their uses with defaultmap(none).  */
+  if (TREE_CODE (decl) == PARM_DECL
+  && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == FUNCTION_TYPE)
+return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE;
+
   /* These are either array or derived parameters, or vtables.  */
   if (VAR_P (decl) && TREE_READONLY (decl)
   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
diff --git a/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90 
b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
new file mode 100644
index 000..43f4295494c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
@@ -0,0 +1,43 @@
+! { dg-additional-options "-fdump-tree-gimple" }
+
+! PR fortran/114283
+
+! { dg-final { scan-tree-dump "#pragma omp parallel shared\\(i\\) if\\(0\\) 
default\\(none\\) firstprivate\\(g\\)" "gimple" } }
+! { dg-final { scan-tree-dump "#pragma omp target num_teams\\(-2\\) 
thread_limit\\(0\\) firstprivate\\(h\\) map\\(from:j \\\[len: 4\\\]\\) 
defaultmap\\(none\\)" "gimple" } }
+
+
+module m
+  implicit none (type, external)
+  !$omp declare target indirect enter(f1, f2)
+contains
+  integer function f1 ()
+f1 = 99
+  end
+  integer function f2 ()
+f2 = 89
+  end
+end module m
+
+use m
+implicit none (type, external)
+call sub1(f1)
+call sub2(f2)
+contains
+  subroutine sub1(g)
+procedure(integer) :: g
+integer :: i
+!$omp parallel default(none) if(.false.) shared(i)
+  i = g ()
+!$omp end parallel
+if (i /= 99) stop 1
+  end
+
+  subroutine sub2(h)
+procedure(integer) :: h
+integer :: j
+!$omp target defaultmap(none) map(from:j)
+  j = h ()
+!$omp end target
+if (j /= 89) stop 1
+  end
+end


[committed] libgomp/libgomp.texi: Fix @node order in @menu

2024-03-12 Thread Tobias Burnus

The ordering problem was reported on #gfortran's IRC.

The warning disappears between texinfo 6.7 and 6.8  – and my bet is that 
it has been caused by the texinfo commit


fa1ee0cf35 Do not warn if external node in menu is not consistent with 
sections


which implies that it was done on purpose in texinfo. It clearly wasn't 
done on purpose in GCC, though. Hence:


Committed as obvious.

Tobias
commit ef79c64cb5762c86ee04ddfcedb7fe31eaa3bac8
Author: Tobias Burnus 
Date:   Tue Mar 12 15:42:50 2024 +0100

libgomp/libgomp.texi: Fix @node order in @menu

While texinfo 7.0.3 does not warn, an older texinfo did complain about:
libgomp.texi:1964: warning: node next `omp_target_memcpy' in menu
`omp_target_memcpy_rect' and in sectioning `omp_target_memcpy_async' differ

libgomp/

* libgomp.texi (Device Memory Routines): Swap item order to match
the order of the '@node's of the '@subsection's.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index bf5c7a76fc9..57165e0e981 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -1783,8 +1783,8 @@ pointers on devices. They have C linkage and do not throw exceptions.
 * omp_target_is_present:: Check whether storage is mapped
 * omp_target_is_accessible:: Check whether memory is device accessible
 * omp_target_memcpy:: Copy data between devices
-* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_async:: Copy data between devices asynchronously
+* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously
 @c * omp_target_memset:: /TR12
 @c * omp_target_memset_async:: /TR12


[gcc r14-9441] libgomp/libgomp.texi: Fix @node order in @menu

2024-03-12 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:ef79c64cb5762c86ee04ddfcedb7fe31eaa3bac8

commit r14-9441-gef79c64cb5762c86ee04ddfcedb7fe31eaa3bac8
Author: Tobias Burnus 
Date:   Tue Mar 12 15:42:50 2024 +0100

libgomp/libgomp.texi: Fix @node order in @menu

While texinfo 7.0.3 does not warn, an older texinfo did complain about:
libgomp.texi:1964: warning: node next `omp_target_memcpy' in menu
`omp_target_memcpy_rect' and in sectioning `omp_target_memcpy_async' differ

libgomp/

* libgomp.texi (Device Memory Routines): Swap item order to match
the order of the '@node's of the '@subsection's.

Diff:
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index bf5c7a76fc9..57165e0e981 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -1783,8 +1783,8 @@ pointers on devices. They have C linkage and do not throw 
exceptions.
 * omp_target_is_present:: Check whether storage is mapped
 * omp_target_is_accessible:: Check whether memory is device accessible
 * omp_target_memcpy:: Copy data between devices
-* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_async:: Copy data between devices asynchronously
+* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices 
asynchronously
 @c * omp_target_memset:: /TR12
 @c * omp_target_memset_async:: /TR12


Re: [Patch] OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

2024-03-12 Thread Tobias Burnus

Jakub Jelinek wrote:


So firstprivate clause handling remaps them then if declare target indirect
is used? If so, the patch looks reasonable to me.


[I have now updated the patch to turn the testcase to ensure
that is also keeps works at runtime.]

OpenMP leaves it a bit open when the remapping has to happen,
but one can construct cases – in particular with unified-shared memory –
where it is not possible to do this upon entry to a target region.

Thus, it has to be done when the function is invoked, e.g.

i = (*g) ();

is turned (in the target region but only on the device side) into

i = (*GOMP_target_map_indirect_ptr (g)) ();

Thus, as long as the host pointer value is transferred to the device,
it works – as the lookup is done on the device side. Directly using a
device address (remap when mapping to the target) will also not shorten
the lookup, i.e. there is no need for it.

Does it still look reasonable to you?

Tobias

PS: The current OpenMP specification, it is listed mainly described via
the glossary (newest change is the addition of dummy procedure):

"indirect device invocation – An indirect call to the _device_ version of 
a _procedure_ on a _device_ other than the _host-device_, through a 
function pointer (C/C++), a pointer to a member function (C++), a dummy 
procedure (Fortran), or a procedure pointer (Fortran) that refers to the 
host version of the _procedure_."
OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

Dummy procedures look similar to variables but aren't - neither in Fortran
nor in OpenMP. As the middle end sees PARM_DECLs, mark them as predetermined
firstprivate for mapping (as already done in gfc_omp_predetermined_sharing).

This does not address the isses related to procedure pointers, which are
still discussed on spec level [see PR].

	PR fortran/114283

gcc/fortran/ChangeLog:

	* trans-openmp.cc (gfc_omp_predetermined_mapping): Map dummy
	procedures as firstprivate.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/declare-target-indirect-4.f90: New test.

 gcc/fortran/trans-openmp.cc|  9 +
 .../libgomp.fortran/declare-target-indirect-4.f90  | 43 ++
 2 files changed, 52 insertions(+)

diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index a2bf15665b3..1dba47126ed 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -343,6 +343,15 @@ gfc_omp_predetermined_mapping (tree decl)
 	&& GFC_DECL_SAVED_DESCRIPTOR (decl)))
 return OMP_CLAUSE_DEFAULTMAP_TO;
 
+  /* Dummy procedures aren't considered variables by OpenMP, thus are
+ disallowed in OpenMP clauses.  They are represented as PARM_DECLs
+ in the middle-end, so return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE here
+ to avoid complaining about their uses with defaultmap(none).  */
+  if (TREE_CODE (decl) == PARM_DECL
+  && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == FUNCTION_TYPE)
+return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE;
+
   /* These are either array or derived parameters, or vtables.  */
   if (VAR_P (decl) && TREE_READONLY (decl)
   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
diff --git a/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90 b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
new file mode 100644
index 000..43f4295494c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
@@ -0,0 +1,43 @@
+! { dg-additional-options "-fdump-tree-gimple" }
+
+! PR fortran/114283
+
+! { dg-final { scan-tree-dump "#pragma omp parallel shared\\(i\\) if\\(0\\) default\\(none\\) firstprivate\\(g\\)" "gimple" } }
+! { dg-final { scan-tree-dump "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) firstprivate\\(h\\) map\\(from:j \\\[len: 4\\\]\\) defaultmap\\(none\\)" "gimple" } }
+
+
+module m
+  implicit none (type, external)
+  !$omp declare target indirect enter(f1, f2)
+contains
+  integer function f1 ()
+f1 = 99
+  end
+  integer function f2 ()
+f2 = 89
+  end
+end module m
+
+use m
+implicit none (type, external)
+call sub1(f1)
+call sub2(f2)
+contains
+  subroutine sub1(g)
+procedure(integer) :: g
+integer :: i
+!$omp parallel default(none) if(.false.) shared(i)
+  i = g ()
+!$omp end parallel
+if (i /= 99) stop 1
+  end
+
+  subroutine sub2(h)
+procedure(integer) :: h
+integer :: j
+!$omp target defaultmap(none) map(from:j)
+  j = h ()
+!$omp end target
+if (j /= 89) stop 1
+  end
+end


[Patch] OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

2024-03-11 Thread Tobias Burnus

Using dummy procedures in a target region with 'defaultmap(none)' leads to:

  Error: 'g' not specified in enclosing 'target'

and this cannot be fixed by using 'firstprivate' as non-pointer dummy routines
are rejected as "Error: Object 'g' is not a variable".

Fixed by doing the same for mapping as for data sharing: using predetermined
firstprivate.

BTW: Only since GCC 14, 'declare target indirect' makes it possible to
simply use dummy procedures and procedures pointers in a target region.

Comments? Suggestions?

Tobias

PS: Procedure pointers aren't variables either, but they act even more like
variables as they permit changing pointer association such that '(first)private'
vs. 'shared'/'map' can both make sense. — GCC accepts those in (nearly) all 
clauses,
ifort only in (first)private while flang not at all. The spec is somewhat silent
about it. This is tracked in the same PR (PR114283) and in the specification
issue #3823.
OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

Dummy procedures look similar to variables but aren't - neither in Fortran
nor in OpenMP. As the middle end sees PARM_DECLs, mark them as predetermined
firstprivate for mapping (as already done in gfc_omp_predetermined_sharing).

This does not address the isses related to procedure pointers, which are
still discussed on spec level [see PR].

	PR fortran/114283

gcc/fortran/ChangeLog:

	* trans-openmp.cc (gfc_omp_predetermined_mapping): Map dummy
	procedures as firstprivate.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/target4.f90: New test.

 gcc/fortran/trans-openmp.cc|  9 +
 gcc/testsuite/gfortran.dg/gomp/target4.f90 | 18 ++
 2 files changed, 27 insertions(+)

diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index a2bf15665b3..1dba47126ed 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -343,6 +343,15 @@ gfc_omp_predetermined_mapping (tree decl)
 	&& GFC_DECL_SAVED_DESCRIPTOR (decl)))
 return OMP_CLAUSE_DEFAULTMAP_TO;
 
+  /* Dummy procedures aren't considered variables by OpenMP, thus are
+ disallowed in OpenMP clauses.  They are represented as PARM_DECLs
+ in the middle-end, so return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE here
+ to avoid complaining about their uses with defaultmap(none).  */
+  if (TREE_CODE (decl) == PARM_DECL
+  && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == FUNCTION_TYPE)
+return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE;
+
   /* These are either array or derived parameters, or vtables.  */
   if (VAR_P (decl) && TREE_READONLY (decl)
   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
diff --git a/gcc/testsuite/gfortran.dg/gomp/target4.f90 b/gcc/testsuite/gfortran.dg/gomp/target4.f90
new file mode 100644
index 000..09364e707f1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/target4.f90
@@ -0,0 +1,18 @@
+! { dg-additional-options "-fdump-tree-gimple" }
+
+! PR fortran/114283
+
+! { dg-final { scan-tree-dump "#pragma omp parallel default\\(none\\) firstprivate\\(g\\)" "gimple" } }
+! { dg-final { scan-tree-dump "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(none\\) firstprivate\\(g\\)" "gimple" } }
+
+subroutine f(g)
+procedure() :: g
+
+!$omp parallel default(none)
+  call g
+!$omp end parallel
+
+!$omp target defaultmap(none)
+  call g
+!$omp end target
+end


  1   2   3   4   5   6   7   8   9   10   >