[PATCH] D73182: [CodeGen] Emit IR for fixed-point multiplication and division.
This revision was automatically updated to reflect the committed changes. Closed by commit rG0b9922e67a0b: [CodeGen] Emit IR for fixed-point multiplication and division. (authored by ebevhan). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73182/new/ https://reviews.llvm.org/D73182 Files: clang/lib/CodeGen/CGExprScalar.cpp clang/test/Frontend/fixed_point_div.c clang/test/Frontend/fixed_point_mul.c Index: clang/test/Frontend/fixed_point_mul.c === --- /dev/null +++ clang/test/Frontend/fixed_point_mul.c @@ -0,0 +1,431 @@ +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -fpadding-on-unsigned-fixed-point -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,UNSIGNED + +void SignedMultiplication() { + // CHECK-LABEL: SignedMultiplication + short _Accum sa; + _Accum a, b, c, d; + long _Accum la; + unsigned short _Accum usa; + unsigned _Accum ua; + unsigned long _Accum ula; + + short _Fract sf; + _Fract f; + long _Fract lf; + unsigned short _Fract usf; + unsigned _Fract uf; + unsigned long _Fract ulf; + + // Same type + // CHECK: [[TMP0:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP0]], i16 [[TMP1]], i32 7) + // CHECK-NEXT: store i16 [[TMP2]], i16* %sa, align 2 + sa = sa * sa; + + // To larger scale and larger width + // CHECK: [[TMP3:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[RESIZE:%.*]] = sext i16 [[TMP3]] to i32 + // CHECK-NEXT: [[UPSCALE:%.*]] = shl i32 [[RESIZE]], 8 + // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[UPSCALE]], i32 [[TMP4]], i32 15) + // CHECK-NEXT: store i32 [[TMP5]], i32* %a, align 4 + a = sa * a; + + // To same scale and smaller width + // CHECK: [[TMP6:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE1:%.*]] = sext i8 [[TMP7]] to i16 + // CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP6]], i16 [[RESIZE1]], i32 7) + // CHECK-NEXT: store i16 [[TMP8]], i16* %sa, align 2 + sa = sa * sf; + + // To smaller scale and same width. + // CHECK: [[TMP9:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* %f, align 2 + // CHECK-NEXT: [[RESIZE2:%.*]] = sext i16 [[TMP9]] to i24 + // CHECK-NEXT: [[UPSCALE3:%.*]] = shl i24 [[RESIZE2]], 8 + // CHECK-NEXT: [[RESIZE4:%.*]] = sext i16 [[TMP10]] to i24 + // CHECK-NEXT: [[TMP11:%.*]] = call i24 @llvm.smul.fix.i24(i24 [[UPSCALE3]], i24 [[RESIZE4]], i32 15) + // CHECK-NEXT: [[DOWNSCALE:%.*]] = ashr i24 [[TMP11]], 8 + // CHECK-NEXT: [[RESIZE5:%.*]] = trunc i24 [[DOWNSCALE]] to i16 + // CHECK-NEXT: store i16 [[RESIZE5]], i16* %sa, align 2 + sa = sa * f; + + // To smaller scale and smaller width + // CHECK: [[TMP12:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE6:%.*]] = sext i8 [[TMP13]] to i32 + // CHECK-NEXT: [[UPSCALE7:%.*]] = shl i32 [[RESIZE6]], 8 + // CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[TMP12]], i32 [[UPSCALE7]], i32 15) + // CHECK-NEXT: store i32 [[TMP14]], i32* %a, align 4 + a = a * sf; + + // To larger scale and same width + // CHECK: [[TMP15:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* %lf, align 4 + // CHECK-NEXT: [[RESIZE8:%.*]] = sext i32 [[TMP15]] to i48 + // CHECK-NEXT: [[UPSCALE9:%.*]] = shl i48 [[RESIZE8]], 16 + // CHECK-NEXT: [[RESIZE10:%.*]] = sext i32 [[TMP16]] to i48 + // CHECK-NEXT: [[TMP17:%.*]] = call i48 @llvm.smul.fix.i48(i48 [[UPSCALE9]], i48 [[RESIZE10]], i32 31) + // CHECK-NEXT: [[DOWNSCALE11:%.*]] = ashr i48 [[TMP17]], 16 + // CHECK-NEXT: [[RESIZE12:%.*]] = trunc i48 [[DOWNSCALE11]] to i32 + // CHECK-NEXT: store i32 [[RESIZE12]], i32* %a, align 4 + a = a * lf; + + // With corresponding unsigned type + // CHECK:[[TMP18:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* %usa, align 2 + // SIGNED-NEXT: [[RESIZE13:%.*]] = sext i16 [[TMP18]] to i17 + // SIGNED-NEXT: [[UPSCALE14:%.*]] = shl i17 [[RESIZE13]], 1 + // SIGNED-NEXT: [[RESIZE15:%.*]] = zext i16 [[TMP19]] to i17 + // SIGNED-NEXT: [[TMP20:%.*]] = call i17 @llvm.smul.fix.i17(i17 [[UPSCALE14]], i17 [[RESIZE15]], i32 8) + // SIGNED-NEXT: [[DOWNSCALE16:%.*]] = ashr i17 [[TMP20]], 1 + // SIGNED-NEXT: [[RESIZE17:%.*]] = trunc i17 [[DOWNSCALE16]] to i16 + // SIGNED-NEXT: store i16 [[RESIZE17]], i16* %sa, align 2 + // UNSIGNED-NEXT:[[TMP20:%.*]] = call i16 @llvm.smul.fix.i16(i
[PATCH] D73182: [CodeGen] Emit IR for fixed-point multiplication and division.
ebevhan updated this revision to Diff 255970. ebevhan added a comment. Rebased. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73182/new/ https://reviews.llvm.org/D73182 Files: clang/lib/CodeGen/CGExprScalar.cpp clang/test/Frontend/fixed_point_div.c clang/test/Frontend/fixed_point_mul.c Index: clang/test/Frontend/fixed_point_mul.c === --- /dev/null +++ clang/test/Frontend/fixed_point_mul.c @@ -0,0 +1,431 @@ +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -fpadding-on-unsigned-fixed-point -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,UNSIGNED + +void SignedMultiplication() { + // CHECK-LABEL: SignedMultiplication + short _Accum sa; + _Accum a, b, c, d; + long _Accum la; + unsigned short _Accum usa; + unsigned _Accum ua; + unsigned long _Accum ula; + + short _Fract sf; + _Fract f; + long _Fract lf; + unsigned short _Fract usf; + unsigned _Fract uf; + unsigned long _Fract ulf; + + // Same type + // CHECK: [[TMP0:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP0]], i16 [[TMP1]], i32 7) + // CHECK-NEXT: store i16 [[TMP2]], i16* %sa, align 2 + sa = sa * sa; + + // To larger scale and larger width + // CHECK: [[TMP3:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[RESIZE:%.*]] = sext i16 [[TMP3]] to i32 + // CHECK-NEXT: [[UPSCALE:%.*]] = shl i32 [[RESIZE]], 8 + // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[UPSCALE]], i32 [[TMP4]], i32 15) + // CHECK-NEXT: store i32 [[TMP5]], i32* %a, align 4 + a = sa * a; + + // To same scale and smaller width + // CHECK: [[TMP6:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE1:%.*]] = sext i8 [[TMP7]] to i16 + // CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP6]], i16 [[RESIZE1]], i32 7) + // CHECK-NEXT: store i16 [[TMP8]], i16* %sa, align 2 + sa = sa * sf; + + // To smaller scale and same width. + // CHECK: [[TMP9:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* %f, align 2 + // CHECK-NEXT: [[RESIZE2:%.*]] = sext i16 [[TMP9]] to i24 + // CHECK-NEXT: [[UPSCALE3:%.*]] = shl i24 [[RESIZE2]], 8 + // CHECK-NEXT: [[RESIZE4:%.*]] = sext i16 [[TMP10]] to i24 + // CHECK-NEXT: [[TMP11:%.*]] = call i24 @llvm.smul.fix.i24(i24 [[UPSCALE3]], i24 [[RESIZE4]], i32 15) + // CHECK-NEXT: [[DOWNSCALE:%.*]] = ashr i24 [[TMP11]], 8 + // CHECK-NEXT: [[RESIZE5:%.*]] = trunc i24 [[DOWNSCALE]] to i16 + // CHECK-NEXT: store i16 [[RESIZE5]], i16* %sa, align 2 + sa = sa * f; + + // To smaller scale and smaller width + // CHECK: [[TMP12:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE6:%.*]] = sext i8 [[TMP13]] to i32 + // CHECK-NEXT: [[UPSCALE7:%.*]] = shl i32 [[RESIZE6]], 8 + // CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[TMP12]], i32 [[UPSCALE7]], i32 15) + // CHECK-NEXT: store i32 [[TMP14]], i32* %a, align 4 + a = a * sf; + + // To larger scale and same width + // CHECK: [[TMP15:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* %lf, align 4 + // CHECK-NEXT: [[RESIZE8:%.*]] = sext i32 [[TMP15]] to i48 + // CHECK-NEXT: [[UPSCALE9:%.*]] = shl i48 [[RESIZE8]], 16 + // CHECK-NEXT: [[RESIZE10:%.*]] = sext i32 [[TMP16]] to i48 + // CHECK-NEXT: [[TMP17:%.*]] = call i48 @llvm.smul.fix.i48(i48 [[UPSCALE9]], i48 [[RESIZE10]], i32 31) + // CHECK-NEXT: [[DOWNSCALE11:%.*]] = ashr i48 [[TMP17]], 16 + // CHECK-NEXT: [[RESIZE12:%.*]] = trunc i48 [[DOWNSCALE11]] to i32 + // CHECK-NEXT: store i32 [[RESIZE12]], i32* %a, align 4 + a = a * lf; + + // With corresponding unsigned type + // CHECK:[[TMP18:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* %usa, align 2 + // SIGNED-NEXT: [[RESIZE13:%.*]] = sext i16 [[TMP18]] to i17 + // SIGNED-NEXT: [[UPSCALE14:%.*]] = shl i17 [[RESIZE13]], 1 + // SIGNED-NEXT: [[RESIZE15:%.*]] = zext i16 [[TMP19]] to i17 + // SIGNED-NEXT: [[TMP20:%.*]] = call i17 @llvm.smul.fix.i17(i17 [[UPSCALE14]], i17 [[RESIZE15]], i32 8) + // SIGNED-NEXT: [[DOWNSCALE16:%.*]] = ashr i17 [[TMP20]], 1 + // SIGNED-NEXT: [[RESIZE17:%.*]] = trunc i17 [[DOWNSCALE16]] to i16 + // SIGNED-NEXT: store i16 [[RESIZE17]], i16* %sa, align 2 + // UNSIGNED-NEXT:[[TMP20:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP18]], i16 [[TMP19]], i32 7) + // UNSIGNED-NEXT:store i16 [[TMP20]], i16* %sa, align 2 + sa = sa * usa;
[PATCH] D73182: [CodeGen] Emit IR for fixed-point multiplication and division.
ebevhan updated this revision to Diff 239793. ebevhan added a comment. Rebased. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73182/new/ https://reviews.llvm.org/D73182 Files: clang/lib/CodeGen/CGExprScalar.cpp clang/test/Frontend/fixed_point_div.c clang/test/Frontend/fixed_point_mul.c Index: clang/test/Frontend/fixed_point_mul.c === --- /dev/null +++ clang/test/Frontend/fixed_point_mul.c @@ -0,0 +1,431 @@ +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -fpadding-on-unsigned-fixed-point -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,UNSIGNED + +void SignedMultiplication() { + // CHECK-LABEL: SignedMultiplication + short _Accum sa; + _Accum a, b, c, d; + long _Accum la; + unsigned short _Accum usa; + unsigned _Accum ua; + unsigned long _Accum ula; + + short _Fract sf; + _Fract f; + long _Fract lf; + unsigned short _Fract usf; + unsigned _Fract uf; + unsigned long _Fract ulf; + + // Same type + // CHECK: [[TMP0:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP0]], i16 [[TMP1]], i32 7) + // CHECK-NEXT: store i16 [[TMP2]], i16* %sa, align 2 + sa = sa * sa; + + // To larger scale and larger width + // CHECK: [[TMP3:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[RESIZE:%.*]] = sext i16 [[TMP3]] to i32 + // CHECK-NEXT: [[UPSCALE:%.*]] = shl i32 [[RESIZE]], 8 + // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[UPSCALE]], i32 [[TMP4]], i32 15) + // CHECK-NEXT: store i32 [[TMP5]], i32* %a, align 4 + a = sa * a; + + // To same scale and smaller width + // CHECK: [[TMP6:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE1:%.*]] = sext i8 [[TMP7]] to i16 + // CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP6]], i16 [[RESIZE1]], i32 7) + // CHECK-NEXT: store i16 [[TMP8]], i16* %sa, align 2 + sa = sa * sf; + + // To smaller scale and same width. + // CHECK: [[TMP9:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* %f, align 2 + // CHECK-NEXT: [[RESIZE2:%.*]] = sext i16 [[TMP9]] to i24 + // CHECK-NEXT: [[UPSCALE3:%.*]] = shl i24 [[RESIZE2]], 8 + // CHECK-NEXT: [[RESIZE4:%.*]] = sext i16 [[TMP10]] to i24 + // CHECK-NEXT: [[TMP11:%.*]] = call i24 @llvm.smul.fix.i24(i24 [[UPSCALE3]], i24 [[RESIZE4]], i32 15) + // CHECK-NEXT: [[DOWNSCALE:%.*]] = ashr i24 [[TMP11]], 8 + // CHECK-NEXT: [[RESIZE5:%.*]] = trunc i24 [[DOWNSCALE]] to i16 + // CHECK-NEXT: store i16 [[RESIZE5]], i16* %sa, align 2 + sa = sa * f; + + // To smaller scale and smaller width + // CHECK: [[TMP12:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE6:%.*]] = sext i8 [[TMP13]] to i32 + // CHECK-NEXT: [[UPSCALE7:%.*]] = shl i32 [[RESIZE6]], 8 + // CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[TMP12]], i32 [[UPSCALE7]], i32 15) + // CHECK-NEXT: store i32 [[TMP14]], i32* %a, align 4 + a = a * sf; + + // To larger scale and same width + // CHECK: [[TMP15:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* %lf, align 4 + // CHECK-NEXT: [[RESIZE8:%.*]] = sext i32 [[TMP15]] to i48 + // CHECK-NEXT: [[UPSCALE9:%.*]] = shl i48 [[RESIZE8]], 16 + // CHECK-NEXT: [[RESIZE10:%.*]] = sext i32 [[TMP16]] to i48 + // CHECK-NEXT: [[TMP17:%.*]] = call i48 @llvm.smul.fix.i48(i48 [[UPSCALE9]], i48 [[RESIZE10]], i32 31) + // CHECK-NEXT: [[DOWNSCALE11:%.*]] = ashr i48 [[TMP17]], 16 + // CHECK-NEXT: [[RESIZE12:%.*]] = trunc i48 [[DOWNSCALE11]] to i32 + // CHECK-NEXT: store i32 [[RESIZE12]], i32* %a, align 4 + a = a * lf; + + // With corresponding unsigned type + // CHECK:[[TMP18:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* %usa, align 2 + // SIGNED-NEXT: [[RESIZE13:%.*]] = sext i16 [[TMP18]] to i17 + // SIGNED-NEXT: [[UPSCALE14:%.*]] = shl i17 [[RESIZE13]], 1 + // SIGNED-NEXT: [[RESIZE15:%.*]] = zext i16 [[TMP19]] to i17 + // SIGNED-NEXT: [[TMP20:%.*]] = call i17 @llvm.smul.fix.i17(i17 [[UPSCALE14]], i17 [[RESIZE15]], i32 8) + // SIGNED-NEXT: [[DOWNSCALE16:%.*]] = ashr i17 [[TMP20]], 1 + // SIGNED-NEXT: [[RESIZE17:%.*]] = trunc i17 [[DOWNSCALE16]] to i16 + // SIGNED-NEXT: store i16 [[RESIZE17]], i16* %sa, align 2 + // UNSIGNED-NEXT:[[TMP20:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP18]], i16 [[TMP19]], i32 7) + // UNSIGNED-NEXT:store i16 [[TMP20]], i16* %sa, align 2 + sa = sa * usa;
[PATCH] D73182: [CodeGen] Emit IR for fixed-point multiplication and division.
ebevhan created this revision. ebevhan added reviewers: rjmccall, leonardchan. Herald added a project: clang. Herald added a subscriber: cfe-commits. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D73182 Files: clang/lib/CodeGen/CGExprScalar.cpp clang/test/Frontend/fixed_point_div.c clang/test/Frontend/fixed_point_mul.c Index: clang/test/Frontend/fixed_point_mul.c === --- /dev/null +++ clang/test/Frontend/fixed_point_mul.c @@ -0,0 +1,431 @@ +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED +// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -fpadding-on-unsigned-fixed-point -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,UNSIGNED + +void SignedMultiplication() { + // CHECK-LABEL: SignedMultiplication + short _Accum sa; + _Accum a, b, c, d; + long _Accum la; + unsigned short _Accum usa; + unsigned _Accum ua; + unsigned long _Accum ula; + + short _Fract sf; + _Fract f; + long _Fract lf; + unsigned short _Fract usf; + unsigned _Fract uf; + unsigned long _Fract ulf; + + // Same type + // CHECK: [[TMP0:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP0]], i16 [[TMP1]], i32 7) + // CHECK-NEXT: store i16 [[TMP2]], i16* %sa, align 2 + sa = sa * sa; + + // To larger scale and larger width + // CHECK: [[TMP3:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[RESIZE:%.*]] = sext i16 [[TMP3]] to i32 + // CHECK-NEXT: [[UPSCALE:%.*]] = shl i32 [[RESIZE]], 8 + // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[UPSCALE]], i32 [[TMP4]], i32 15) + // CHECK-NEXT: store i32 [[TMP5]], i32* %a, align 4 + a = sa * a; + + // To same scale and smaller width + // CHECK: [[TMP6:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE1:%.*]] = sext i8 [[TMP7]] to i16 + // CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP6]], i16 [[RESIZE1]], i32 7) + // CHECK-NEXT: store i16 [[TMP8]], i16* %sa, align 2 + sa = sa * sf; + + // To smaller scale and same width. + // CHECK: [[TMP9:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* %f, align 2 + // CHECK-NEXT: [[RESIZE2:%.*]] = sext i16 [[TMP9]] to i24 + // CHECK-NEXT: [[UPSCALE3:%.*]] = shl i24 [[RESIZE2]], 8 + // CHECK-NEXT: [[RESIZE4:%.*]] = sext i16 [[TMP10]] to i24 + // CHECK-NEXT: [[TMP11:%.*]] = call i24 @llvm.smul.fix.i24(i24 [[UPSCALE3]], i24 [[RESIZE4]], i32 15) + // CHECK-NEXT: [[DOWNSCALE:%.*]] = ashr i24 [[TMP11]], 8 + // CHECK-NEXT: [[RESIZE5:%.*]] = trunc i24 [[DOWNSCALE]] to i16 + // CHECK-NEXT: store i16 [[RESIZE5]], i16* %sa, align 2 + sa = sa * f; + + // To smaller scale and smaller width + // CHECK: [[TMP12:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* %sf, align 1 + // CHECK-NEXT: [[RESIZE6:%.*]] = sext i8 [[TMP13]] to i32 + // CHECK-NEXT: [[UPSCALE7:%.*]] = shl i32 [[RESIZE6]], 8 + // CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[TMP12]], i32 [[UPSCALE7]], i32 15) + // CHECK-NEXT: store i32 [[TMP14]], i32* %a, align 4 + a = a * sf; + + // To larger scale and same width + // CHECK: [[TMP15:%.*]] = load i32, i32* %a, align 4 + // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* %lf, align 4 + // CHECK-NEXT: [[RESIZE8:%.*]] = sext i32 [[TMP15]] to i48 + // CHECK-NEXT: [[UPSCALE9:%.*]] = shl i48 [[RESIZE8]], 16 + // CHECK-NEXT: [[RESIZE10:%.*]] = sext i32 [[TMP16]] to i48 + // CHECK-NEXT: [[TMP17:%.*]] = call i48 @llvm.smul.fix.i48(i48 [[UPSCALE9]], i48 [[RESIZE10]], i32 31) + // CHECK-NEXT: [[DOWNSCALE11:%.*]] = ashr i48 [[TMP17]], 16 + // CHECK-NEXT: [[RESIZE12:%.*]] = trunc i48 [[DOWNSCALE11]] to i32 + // CHECK-NEXT: store i32 [[RESIZE12]], i32* %a, align 4 + a = a * lf; + + // With corresponding unsigned type + // CHECK:[[TMP18:%.*]] = load i16, i16* %sa, align 2 + // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* %usa, align 2 + // SIGNED-NEXT: [[RESIZE13:%.*]] = sext i16 [[TMP18]] to i17 + // SIGNED-NEXT: [[UPSCALE14:%.*]] = shl i17 [[RESIZE13]], 1 + // SIGNED-NEXT: [[RESIZE15:%.*]] = zext i16 [[TMP19]] to i17 + // SIGNED-NEXT: [[TMP20:%.*]] = call i17 @llvm.smul.fix.i17(i17 [[UPSCALE14]], i17 [[RESIZE15]], i32 8) + // SIGNED-NEXT: [[DOWNSCALE16:%.*]] = ashr i17 [[TMP20]], 1 + // SIGNED-NEXT: [[RESIZE17:%.*]] = trunc i17 [[DOWNSCALE16]] to i16 + // SIGNED-NEXT: store i16 [[RESIZE17]], i16* %sa, align 2 + // UNSIGNED-NEXT:[[TMP20:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[TMP18]], i16 [[TMP19]], i32 7) + // UNSIGNED-NEXT:store i16 [[TMP20]], i16* %sa, align 2 + sa = sa * us