tlively created this revision. tlively added a reviewer: aheejin. Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, jgravelle-google, sbc100, dschuff. Herald added projects: clang, LLVM.
The vector pattern `(a + b + 1) / 2` was previously selected to an avgr_u instruction regardless of nuw flags, but this is incorrect in the case where either addition may have an unsigned wrap. This CL changes the existing pattern to require both adds to have nuw flags and adds builtin functions and intrinsics for the avgr_u instructions because the corrected pattern is not representable in C. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71648 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-arith.ll llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -65,6 +65,16 @@ ret <16 x i8> %a } +; CHECK-LABEL: avgr_u_v16i8: +; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8>, <16 x i8>) +define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> %x, <16 x i8> %y) + ret <16 x i8> %a +} + ; CHECK-LABEL: any_v16i8: ; SIMD128-NEXT: .functype any_v16i8 (v128) -> (i32){{$}} ; SIMD128-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}} @@ -168,6 +178,16 @@ ret <8 x i16> %a } +; CHECK-LABEL: avgr_u_v8i16: +; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16>, <8 x i16>) +define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> %x, <8 x i16> %y) + ret <8 x i16> %a +} + ; CHECK-LABEL: any_v8i16: ; SIMD128-NEXT: .functype any_v8i16 (v128) -> (i32){{$}} ; SIMD128-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} Index: llvm/test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -97,6 +97,19 @@ ; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = add nuw <16 x i8> %x, %y + %b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, + i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> + ret <16 x i8> %c +} + +; CHECK-LABEL: avgr_u_v16i8_wrap: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128){{$}} +; SIMD128-NOT: i8x16.avgr_u +define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { %a = add <16 x i8> %x, %y %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -401,6 +414,17 @@ ; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = add nuw <8 x i16> %x, %y + %b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> + ret <8 x i16> %c +} + +; CHECK-LABEL: avgr_u_v8i16_wrap: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128){{$}} +; SIMD128-NOT: i16x8.avgr_u +define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { %a = add <8 x i16> %x, %y %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -739,23 +739,24 @@ } // isCommutable = 1 // Integer unsigned rounding average: avgr_u -def avgr_u_v16i8 : - PatFrag<(ops node:$lhs, node:$rhs), - (srl - (add (add node:$lhs, node:$rhs), (splat16 (i32 1))), - (v16i8 (splat16 (i32 1))) - )>; -def avgr_u_v8i16 : - PatFrag<(ops node:$lhs, node:$rhs), - (srl - (add (add node:$lhs, node:$rhs), (splat8 (i32 1))), - (v8i16 (splat8 (i32 1))) - )>; - let isCommutable = 1, Predicates = [HasUnimplementedSIMD128] in { -defm AVGR_U : SIMDBinary<v16i8, "i8x16", avgr_u_v16i8, "avgr_u", 217>; -defm AVGR_U : SIMDBinary<v8i16, "i16x8", avgr_u_v8i16, "avgr_u", 218>; -} +defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 217>; +defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 218>; +} + +def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), + (add node:$lhs, node:$rhs), + "return N->getFlags().hasNoUnsignedWrap();">; + +foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in +def : Pat<(srl + (add_nuw + (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), + (nodes[1] (i32 1)) + ), + (nodes[0] (nodes[1] (i32 1))) + ), + (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; // Widening dot product: i32x4.dot_i16x8_s let isCommutable = 1 in Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -112,6 +112,10 @@ Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_avgr_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_bitselect : Intrinsic<[llvm_anyvector_ty], Index: clang/test/CodeGen/builtins-wasm.c =================================================================== --- clang/test/CodeGen/builtins-wasm.c +++ clang/test/CodeGen/builtins-wasm.c @@ -352,6 +352,20 @@ // WEBASSEMBLY-NEXT: ret } +i8x16 avgr_u_i8x16(i8x16 x, i8x16 y) { + return __builtin_wasm_avgr_u_i8x16(x, y); + // WEBASSEMBLY: call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret +} + +i16x8 avgr_u_i16x8(i16x8 x, i16x8 y) { + return __builtin_wasm_avgr_u_i16x8(x, y); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret +} + i32x4 dot_i16x8_s(i16x8 x, i16x8 y) { return __builtin_wasm_dot_s_i32x4_i16x8(x, y); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.dot(<8 x i16> %x, <8 x i16> %y) Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -14496,6 +14496,14 @@ Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: + case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Index: clang/include/clang/Basic/BuiltinsWebAssembly.def =================================================================== --- clang/include/clang/Basic/BuiltinsWebAssembly.def +++ clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -98,6 +98,9 @@ TARGET_BUILTIN(__builtin_wasm_sub_saturate_s_i16x8, "V8sV8sV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16cV16cV16c", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8sV8sV8s", "nc", "unimplemented-simd128") + TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16c", "nc", "simd128")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits