>> cvt_type >> - = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode), >> + = build_nonstandard_integer_type (GET_MODE_BITSIZE >> + (intermediate_mode), >> 0); > > the 0); now fits on the previous line? > > Otherwise looks OK to me.
Thanks, I adjusted the things you remarked including the use of int_mode_for_size. Another thing I changed is continuing instead of breaking when the current intermediate mode cannot hold the range so it still has a chance to fit in the next larger one. Bootstrap and testsuite are unchanged on x86, aarch64 and power and I'm going to commit the attached barring further remarks. Regards Robin >From cabfa07256eafec4485304fe7639d8fd7512cf11 Mon Sep 17 00:00:00 2001 From: Robin Dapp <rd...@ventanamicro.com> Date: Thu, 13 Jul 2023 09:10:06 +0200 Subject: [PATCH v3] vect: Handle demoting FLOAT and promoting FIX_TRUNC. The recent changes that allowed multi-step conversions for "non-packing/unpacking", i.e. modifier == NONE targets included promoting to-float and demoting to-int variants. This patch adds the missing demoting to-float and promoting to-int handling. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_conversion): Handle more demotion/promotion for modifier == NONE. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c: New test. * gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c: New test. --- .../conversions/vec-narrow-int64-float16.c | 12 ++++ .../conversions/vec-widen-float16-int64.c | 12 ++++ gcc/tree-vect-stmts.cc | 69 ++++++++++++++----- 3 files changed, 76 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c new file mode 100644 index 00000000000..ebee1cfa888 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ + +#include <stdint-gcc.h> + +void convert (_Float16 *restrict dst, int64_t *restrict a, int n) +{ + for (int i = 0; i < n; i++) + dst[i] = (_Float16) (a[i] & 0x7fffffff); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c new file mode 100644 index 00000000000..eb0a17e99bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */ + +#include <stdint-gcc.h> + +void convert (int64_t *restrict dst, _Float16 *restrict a, int n) +{ + for (int i = 0; i < n; i++) + dst[i] = (int64_t) a[i]; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index cb86d544313..51173ecf145 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5192,31 +5192,66 @@ vectorizable_conversion (vec_info *vinfo, break; } - /* For conversions between float and smaller integer types try whether we - can use intermediate signed integer types to support the + /* For conversions between float and integer types try whether + we can use intermediate signed integer types to support the conversion. */ - if ((code == FLOAT_EXPR - && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode)) - || (code == FIX_TRUNC_EXPR - && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode) - && !flag_trapping_math)) + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) + && (code == FLOAT_EXPR || + (code == FIX_TRUNC_EXPR && !flag_trapping_math))) { + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode); bool float_expr_p = code == FLOAT_EXPR; - scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode; - fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode); + unsigned short target_size; + scalar_mode intermediate_mode; + if (demotion) + { + intermediate_mode = lhs_mode; + target_size = GET_MODE_SIZE (rhs_mode); + } + else + { + target_size = GET_MODE_SIZE (lhs_mode); + if (!int_mode_for_size + (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode)) + goto unsupported; + } code1 = float_expr_p ? code : NOP_EXPR; codecvt1 = float_expr_p ? NOP_EXPR : code; - FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode) + opt_scalar_mode mode_iter; + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) { - imode = rhs_mode_iter.require (); - if (GET_MODE_SIZE (imode) > fltsz) + intermediate_mode = mode_iter.require (); + + if (GET_MODE_SIZE (intermediate_mode) > target_size) break; - cvt_type - = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode), - 0); - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, - slp_node); + scalar_mode cvt_mode; + if (!int_mode_for_size + (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode)) + break; + + cvt_type = build_nonstandard_integer_type + (GET_MODE_BITSIZE (cvt_mode), 0); + + /* Check if the intermediate type can hold OP0's range. + When converting from float to integer this is not necessary + because values that do not fit the (smaller) target type are + unspecified anyway. */ + if (demotion && float_expr_p) + { + wide_int op_min_value, op_max_value; + if (!vect_get_range_info (op0, &op_min_value, &op_max_value)) + break; + + if (cvt_type == NULL_TREE + || (wi::min_precision (op_max_value, SIGNED) + > TYPE_PRECISION (cvt_type)) + || (wi::min_precision (op_min_value, SIGNED) + > TYPE_PRECISION (cvt_type))) + continue; + } + + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node); /* This should only happened for SLP as long as loop vectorizer only supports same-sized vector. */ if (cvt_type == NULL_TREE -- 2.41.0