Hello world, the attached patch implements a new option, -fexternal-blas64, so people can use 64-bit libraries for external BLAS, like Intel MKL.
Regression-tested. OK for trunk? Best regards Thomas
Implement -fexternal-blas64 option. Libraries like Intel MKL use 64-bit integers in their API, but gfortran up to now only provides external BLAS for matmul with 32-bit integers. This straightforward patch provides a new option -fexternal-blas64 to remedy that situation. gcc/fortran/ChangeLog: * frontend-passes.cc (optimize_namespace): Handle flag_external_blas64. (call_external_blas): If flag_external_blas is set, use gfc_integer_4_kind as the argument kind, gfc_integer_8_kind otherwise. * gfortran.h (gfc_integer_8_kind): Define. * invoke.texi: Document -fexternal-blas64. * lang.opt: Add -fexternal-blas64. * options.cc (gfc_post_options): -fexternal-blas is incompatible with -fexternal-blas64. gcc/testsuite/ChangeLog: * gfortran.dg/matmul_blas_3.f90: New test.
diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc index 4a468b93600..595c5095eaf 100644 --- a/gcc/fortran/frontend-passes.cc +++ b/gcc/fortran/frontend-passes.cc @@ -1481,7 +1481,8 @@ optimize_namespace (gfc_namespace *ns) gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL); gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL); - if (flag_inline_matmul_limit != 0 || flag_external_blas) + if (flag_inline_matmul_limit != 0 || flag_external_blas + || flag_external_blas64) { bool found; do @@ -1496,7 +1497,7 @@ optimize_namespace (gfc_namespace *ns) NULL); } - if (flag_external_blas) + if (flag_external_blas || flag_external_blas64) gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback, NULL); @@ -4644,6 +4645,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, enum matrix_case m_case; bool realloc_c; gfc_code **next_code_point; + int arg_kind; /* Many of the tests for inline matmul also apply here. */ @@ -4929,13 +4931,20 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, transb, 1); actual->next = next; + if (flag_external_blas) + arg_kind = gfc_integer_4_kind; + else + { + gcc_assert (flag_external_blas64); + arg_kind = gfc_integer_8_kind; + } + c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1, - gfc_integer_4_kind); + arg_kind); c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2, - gfc_integer_4_kind); - + arg_kind); b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1, - gfc_integer_4_kind); + arg_kind); /* Argument M. */ actual = next; @@ -4975,7 +4984,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, actual = next; next = gfc_get_actual_arglist (); next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a), - 1, gfc_integer_4_kind); + 1, arg_kind); actual->next = next; /* Argument B. */ @@ -4988,7 +4997,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, actual = next; next = gfc_get_actual_arglist (); next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b), - 1, gfc_integer_4_kind); + 1, arg_kind); actual->next = next; /* Argument BETA - set to zero. */ @@ -5012,7 +5021,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, actual = next; next = gfc_get_actual_arglist (); next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1), - 1, gfc_integer_4_kind); + 1, arg_kind); actual->next = next; return 0; diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 2e6b368b4c2..74fcd1ad9de 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3672,6 +3672,8 @@ extern int gfc_character_storage_size; #define gfc_integer_4_kind 4 #define gfc_real_4_kind 4 +#define gfc_integer_8_kind 8 + /* symbol.cc */ void gfc_clear_new_implicit (void); bool gfc_add_new_implicit_range (int, int); diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index 0b893e876a5..d62ee819997 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -189,7 +189,7 @@ and warnings}. -fbounds-check -ftail-call-workaround -ftail-call-workaround=@var{n} -fcheck-array-temporaries -fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion> --fcoarray=<none|single|lib> -fexternal-blas -ff2c +-fcoarray=<none|single|lib> -fexternal-blas -fexternal-blas64 -ff2c -ffrontend-loop-interchange -ffrontend-optimize -finit-character=@var{n} -finit-integer=@var{n} -finit-local-zero -finit-derived -finit-logical=<true|false> @@ -2014,13 +2014,26 @@ for some matrix operations like @code{MATMUL}, instead of using our own algorithms, if the size of the matrices involved is larger than a given limit (see @option{-fblas-matmul-limit}). This may be profitable if an optimized vendor BLAS library is available. The BLAS library has -to be specified at link time. +to be specified at link time. This option specifies a BLAS library +with integer arguments of default kind (32 bits). It cannot be used +together with @option{-fexternal-blas64}. + +@opindex fexternal-blas64 +@item -fexternal-blas64 +makes @command{gfortran} generate calls to BLAS functions +for some matrix operations like @code{MATMUL}, instead of using our own +algorithms, if the size of the matrices involved is larger than a given +limit (see @option{-fblas-matmul-limit}). This may be profitable if an +optimized vendor BLAS library is available. The BLAS library has +to be specified at link time. This option specifies a BLAS library +with integer arguments of @code{KIND=8} (64 bits). It cannot be used +together with @option{-fexternal-blas}. @opindex fblas-matmul-limit @item -fblas-matmul-limit=@var{n} -Only significant when @option{-fexternal-blas} is in effect. -Matrix multiplication of matrices with size larger than (or equal to) @var{n} -is performed by calls to BLAS functions, while others are +Only significant when @option{-fexternal-blas} or @option{-fexternal-blas64} +are in effect. Matrix multiplication of matrices with size larger than or equal +to @var{n} is performed by calls to BLAS functions, while others are handled by @command{gfortran} internal algorithms. If the matrices involved are not square, the size comparison is performed using the geometric mean of the dimensions of the argument and result matrices. diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index 7826a1ab5fa..33710d0d920 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -566,6 +566,10 @@ fexternal-blas Fortran Var(flag_external_blas) Specify that an external BLAS library should be used for matmul calls on large-size arrays. +fexternal-blas64 +Fortran Var(flag_external_blas64) +Use an external BLAS library with 64-bit indexing for matmul on large-size arrays. + ff2c Fortran Var(flag_f2c) Use f2c calling convention. diff --git a/gcc/fortran/options.cc b/gcc/fortran/options.cc index 821a8c88bbb..35c1924a9c9 100644 --- a/gcc/fortran/options.cc +++ b/gcc/fortran/options.cc @@ -504,7 +504,12 @@ gfc_post_options (const char **pfilename) flag_inline_matmul_limit = 30; } - /* Optimization implies front end optimization, unless the user + /* We can only have a 32-bit or a 64-bit version of BLAS, not both. */ + + if (flag_external_blas && flag_external_blas64) + gfc_fatal_error ("32- and 64-bit version of BLAS cannot both be specified"); + + /* Optimizationx implies front end optimization, unless the user specified it directly. */ if (flag_frontend_optimize == -1) diff --git a/gcc/testsuite/gfortran.dg/matmul_blas_3.f90 b/gcc/testsuite/gfortran.dg/matmul_blas_3.f90 new file mode 100644 index 00000000000..d496596b5da --- /dev/null +++ b/gcc/testsuite/gfortran.dg/matmul_blas_3.f90 @@ -0,0 +1,12 @@ +! { dg-do compile } +! { dg-options "-ffrontend-optimize -fexternal-blas64 -fdump-tree-original" } +! PR 121161 - option for 64-bit BLAS for MATMUL. +! Check this by making sure there is no KIND=4 integer. +subroutine foo(a,b,c,n) + implicit none + integer(kind=8) :: n + real, dimension(n,n) :: a, b, c + c = matmul(a,b) +end subroutine foo +! { dg-final { scan-tree-dump-not "integer\\(kind=4\\)" "original" } } +! { dg-final { scan-tree-dump-times "sgemm" 1 "original" } }