On Fri, Feb 01, 2013 at 04:05:51PM +0100, Christian König wrote: > Hi guys, > > I needed to rearrange the order of patches in this patchset, cause fixing one > bug lead to the discovery of a couple of other more nasty bugs. > > It now fixes four piglit tests with radeonsi and also survives the use cases > Tom provided without causing any regression. > > So please test again, and apply if it now works correctly. >
Hi Christian, I've discovered another potential problem, this time with the piglit test: ext_timer_query-time-elapsed timestamp What's happening is that the EndCF instructions are being reordered within their basic block during instruction selection, so if there is an llvm IR block like this: ENDIF39: ; preds = %IF40, %ENDIF36 %temp.2 = phi float [ %77, %IF40 ], [ %58, %ENDIF36 ] call void @llvm.R600.endcf(i1 %68) %78 = bitcast float %temp4.0 to i32 %79 = add i32 %78, 1 %80 = bitcast i32 %79 to float br label %Flow It is being emitted, like this: BB#8: derived from LLVM BB %ENDIF39 Predecessors according to CFG: BB#6 BB#7 %vreg14<def> = PHI %vreg11, <BB#6>, %vreg13, <BB#7>; R600_Reg32:%vreg14,%vreg11,%vreg13 %vreg15<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, -1, %ONE_INT, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg15,%vreg0 R600_ENDIF BRANCH <BB#5> I've attached the output of the test I mentioned above. It contains the LLVM IR before and after the CFG structurizer pass, the SelectionDAG debug optput for the problem block and then the resulting MachineInstrs. -Tom
define void @main() { main_body: br label %LOOP LOOP: ; preds = %ENDIF39, %main_body %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %70, %ENDIF39 ] %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.2, %ENDIF39 ] %0 = bitcast float %temp4.0 to i32 %1 = load <4 x float> addrspace(9)* null %2 = extractelement <4 x float> %1, i32 0 %3 = bitcast float %2 to i32 %4 = icmp sge i32 %0, %3 %5 = sext i1 %4 to i32 %6 = bitcast i32 %5 to float %7 = bitcast float %6 to i32 %8 = icmp ne i32 %7, 0 %9 = bitcast float %temp.0 to i32 br i1 %8, label %IF, label %ENDIF IF: ; preds = %LOOP %10 = sitofp i32 %9 to float %11 = load <4 x float> addrspace(9)* null %12 = extractelement <4 x float> %11, i32 0 %13 = bitcast float %12 to i32 %14 = sitofp i32 %13 to float %15 = fdiv float 1.000000e+00, %14 %16 = fmul float %10, %15 %17 = call float @llvm.AMDIL.clamp.(float %16, float 0.000000e+00, float 1.000000e+00) %18 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) %19 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %20 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %21 = insertelement <4 x float> undef, float %17, i32 0 %22 = insertelement <4 x float> %21, float %18, i32 1 %23 = insertelement <4 x float> %22, float %19, i32 2 %24 = insertelement <4 x float> %23, float %20, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0) ret void ENDIF: ; preds = %LOOP %25 = bitcast float %temp4.0 to i32 %26 = add i32 %9, %25 %27 = bitcast i32 %26 to float %28 = bitcast float %27 to i32 %29 = load <4 x float> addrspace(9)* null %30 = extractelement <4 x float> %29, i32 0 %31 = bitcast float %30 to i32 %32 = icmp sge i32 %28, %31 %33 = sext i1 %32 to i32 %34 = bitcast i32 %33 to float %35 = bitcast float %34 to i32 %36 = icmp ne i32 %35, 0 br i1 %36, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF %37 = load <4 x float> addrspace(9)* null %38 = extractelement <4 x float> %37, i32 0 %39 = bitcast float %38 to i32 %40 = sub i32 0, %39 %41 = bitcast i32 %40 to float %42 = bitcast float %27 to i32 %43 = bitcast float %41 to i32 %44 = add i32 %42, %43 %45 = bitcast i32 %44 to float br label %ENDIF36 ENDIF36: ; preds = %IF37, %ENDIF %temp.1 = phi float [ %45, %IF37 ], [ %27, %ENDIF ] %46 = bitcast float %temp.1 to i32 %47 = bitcast float %temp4.0 to i32 %48 = add i32 %46, %47 %49 = bitcast i32 %48 to float %50 = bitcast float %49 to i32 %51 = load <4 x float> addrspace(9)* null %52 = extractelement <4 x float> %51, i32 0 %53 = bitcast float %52 to i32 %54 = icmp sge i32 %50, %53 %55 = sext i1 %54 to i32 %56 = bitcast i32 %55 to float %57 = bitcast float %56 to i32 %58 = icmp ne i32 %57, 0 br i1 %58, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 %59 = load <4 x float> addrspace(9)* null %60 = extractelement <4 x float> %59, i32 0 %61 = bitcast float %60 to i32 %62 = sub i32 0, %61 %63 = bitcast i32 %62 to float %64 = bitcast float %49 to i32 %65 = bitcast float %63 to i32 %66 = add i32 %64, %65 %67 = bitcast i32 %66 to float br label %ENDIF39 ENDIF39: ; preds = %IF40, %ENDIF36 %temp.2 = phi float [ %67, %IF40 ], [ %49, %ENDIF36 ] %68 = bitcast float %temp4.0 to i32 %69 = add i32 %68, 1 %70 = bitcast i32 %69 to float br label %LOOP } *** IR Dump Before Preliminary module verification *** define void @main() { main_body: br label %LOOP LOOP: ; preds = %Flow, %main_body %0 = phi i1 [ %53, %Flow ], [ false, %main_body ] %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %50, %Flow ] %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %51, %Flow ] %1 = bitcast float %temp4.0 to i32 %2 = load <4 x float> addrspace(9)* null %3 = extractelement <4 x float> %2, i32 0 %4 = bitcast float %3 to i32 %5 = icmp sge i32 %1, %4 %6 = sext i1 %5 to i32 %7 = bitcast i32 %6 to float %8 = bitcast float %7 to i32 %9 = icmp ne i32 %8, 0 %10 = bitcast float %temp.0 to i32 %11 = xor i1 %9, true %12 = call i1 @llvm.R600.if(i1 %11) br i1 %12, label %ENDIF, label %Flow IF: ; preds = %Flow call void @llvm.R600.endcf(i1 %53) %13 = sitofp i32 %10 to float %14 = load <4 x float> addrspace(9)* null %15 = extractelement <4 x float> %14, i32 0 %16 = bitcast float %15 to i32 %17 = sitofp i32 %16 to float %18 = fdiv float 1.000000e+00, %17 %19 = fmul float %13, %18 %20 = call float @llvm.AMDIL.clamp.(float %19, float 0.000000e+00, float 1.000000e+00) %21 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) %22 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %23 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %24 = insertelement <4 x float> undef, float %20, i32 0 %25 = insertelement <4 x float> %24, float %21, i32 1 %26 = insertelement <4 x float> %25, float %22, i32 2 %27 = insertelement <4 x float> %26, float %23, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 0) ret void ENDIF: ; preds = %LOOP %28 = bitcast float %temp4.0 to i32 %29 = add i32 %10, %28 %30 = bitcast i32 %29 to float %31 = bitcast float %30 to i32 %32 = load <4 x float> addrspace(9)* null %33 = extractelement <4 x float> %32, i32 0 %34 = bitcast float %33 to i32 %35 = icmp sge i32 %31, %34 %36 = sext i1 %35 to i32 %37 = bitcast i32 %36 to float %38 = bitcast float %37 to i32 %39 = icmp ne i32 %38, 0 %40 = call i1 @llvm.R600.if(i1 %39) br i1 %40, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF %41 = load <4 x float> addrspace(9)* null %42 = extractelement <4 x float> %41, i32 0 %43 = bitcast float %42 to i32 %44 = sub i32 0, %43 %45 = bitcast i32 %44 to float %46 = bitcast float %30 to i32 %47 = bitcast float %45 to i32 %48 = add i32 %46, %47 %49 = bitcast i32 %48 to float br label %ENDIF36 Flow: ; preds = %ENDIF39, %LOOP %50 = phi float [ %80, %ENDIF39 ], [ undef, %LOOP ] %51 = phi float [ %temp.2, %ENDIF39 ], [ undef, %LOOP ] %52 = xor i1 %12, true %53 = call i1 @llvm.R600.ifbreak(i1 %52) call void @llvm.R600.endcf(i1 %12) %54 = call i1 @llvm.R600.loop(i1 %53) br i1 %54, label %IF, label %LOOP ENDIF36: ; preds = %IF37, %ENDIF %temp.1 = phi float [ %49, %IF37 ], [ %30, %ENDIF ] call void @llvm.R600.endcf(i1 %40) %55 = bitcast float %temp.1 to i32 %56 = bitcast float %temp4.0 to i32 %57 = add i32 %55, %56 %58 = bitcast i32 %57 to float %59 = bitcast float %58 to i32 %60 = load <4 x float> addrspace(9)* null %61 = extractelement <4 x float> %60, i32 0 %62 = bitcast float %61 to i32 %63 = icmp sge i32 %59, %62 %64 = sext i1 %63 to i32 %65 = bitcast i32 %64 to float %66 = bitcast float %65 to i32 %67 = icmp ne i32 %66, 0 %68 = call i1 @llvm.R600.if(i1 %67) br i1 %68, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 %69 = load <4 x float> addrspace(9)* null %70 = extractelement <4 x float> %69, i32 0 %71 = bitcast float %70 to i32 %72 = sub i32 0, %71 %73 = bitcast i32 %72 to float %74 = bitcast float %58 to i32 %75 = bitcast float %73 to i32 %76 = add i32 %74, %75 %77 = bitcast i32 %76 to float br label %ENDIF39 ENDIF39: ; preds = %IF40, %ENDIF36 %temp.2 = phi float [ %77, %IF40 ], [ %58, %ENDIF36 ] call void @llvm.R600.endcf(i1 %68) %78 = bitcast float %temp4.0 to i32 %79 = add i32 %78, 1 %80 = bitcast i32 %79 to float br label %Flow } Total amount of phi nodes to update: 1 Node 0 : (0x2646428, 2147483661) Initial selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 16 nodes: 0x2612150: ch = EntryToken [ORD=59] 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] 0x2654360: i32 = bitcast 0x2653450 [ORD=60] 0x2652d50: i32 = Constant<1> [ORD=61] 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 0x2612150: <multiple use> 0x2654260: i32 = TargetConstant<2628> [ORD=59] 0x2612150: <multiple use> 0x2653f60: i1 = Register %vreg12 [ORD=59] 0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59] 0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59] 0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60 0x2653660: ch = BasicBlock<Flow 0x2646098> 0x2653e60: ch = br 0x264f8a0, 0x2653660 Optimized lowered selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 16 nodes: 0x2612150: ch = EntryToken [ORD=59] 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] 0x2654360: i32 = bitcast 0x2653450 [ORD=60] 0x2652d50: i32 = Constant<1> [ORD=61] 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 0x2612150: <multiple use> 0x2654260: i32 = TargetConstant<2628> [ORD=59] 0x2612150: <multiple use> 0x2653f60: i1 = Register %vreg12 [ORD=59] 0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59] 0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59] 0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60 0x2653660: ch = BasicBlock<Flow 0x2646098> 0x2653e60: ch = br 0x264f8a0, 0x2653660 Type-legalized selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 16 nodes: 0x2612150: ch = EntryToken [ORD=59] [ID=-3] 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 [ID=-3] 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=-3] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=-3] 0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=-3] 0x2652d50: i32 = Constant<1> [ORD=61] [ID=-3] 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=-3] 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=-3] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=-3] 0x2612150: <multiple use> 0x2654260: i32 = TargetConstant<2628> [ORD=59] [ID=-3] 0x2612150: <multiple use> 0x2653f60: i1 = Register %vreg12 [ORD=59] [ID=-3] 0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59] [ID=-3] 0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59] [ID=-3] 0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60 [ID=-3] 0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=-3] 0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=-3] Legalized selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 13 nodes: 0x2612150: ch = EntryToken [ORD=59] [ID=0] 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 [ID=5] 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=3] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=8] 0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=10] 0x2652d50: i32 = Constant<1> [ORD=61] [ID=4] 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=11] 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=12] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=13] 0x2612150: <multiple use> 0x264fba0: ch = ENDIF 0x2612150 0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0 [ID=14] 0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=6] 0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=15] Optimized legalized selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 13 nodes: 0x2612150: ch = EntryToken [ORD=59] [ID=0] 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 [ID=5] 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=3] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=8] 0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=10] 0x2652d50: i32 = Constant<1> [ORD=61] [ID=4] 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=11] 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=12] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=13] 0x2612150: <multiple use> 0x264fba0: ch = ENDIF 0x2612150 0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0 [ID=14] 0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=6] 0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=15] ===== Instruction selection begins: BB#8 'ENDIF39' ISEL: Starting pattern match on root node: 0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=12] Initial Opcode index to 17650 Skipped scope entry (due to false predicate) at index 17660, continuing at 17670 Morphed node: 0x2653e60: ch = BRANCH 0x2653660, 0x264f8a0 ISEL: Match complete! ISEL: Starting pattern match on root node: 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=9] Initial Opcode index to 10366 TypeSwitch[f32] from 10367 to 10395 ISEL: Match complete! ISEL: Starting pattern match on root node: 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=8] Initial Opcode index to 10845 Morphed node: 0x264f6a0: i32 = ADD_INT 0x2652950, 0x2652950, 0x2654260, 0x2652950, 0x2652950, 0x2652950, 0x2654360, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2652d50, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2654260, 0x2654060, 0x2652950 [ORD=61] ISEL: Match complete! ISEL: Starting pattern match on root node: 0x264fba0: ch = ENDIF 0x2612150 [ID=5] Initial Opcode index to 17637 Morphed node: 0x264fba0: ch = R600_ENDIF 0x2612150 ISEL: Match complete! ISEL: Starting pattern match on root node: 0x2652d50: i32 = Constant<1> [ORD=61] [ID=2] Initial Opcode index to 10223 Skipped scope entry (due to false predicate) at index 10225, continuing at 10238 TypeSwitch[i32] from 10240 to 10259 Skipped scope entry (due to false predicate) at index 10261, continuing at 10285 Morphed node: 0x2652d50: i32 = MOV_IMM_I32 0x2654260 [ORD=61] ISEL: Match complete! ===== Instruction selection ends: Selected selection DAG: BB#8 'main:ENDIF39' SelectionDAG has 17 nodes: 0x2612150: ch = EntryToken [ORD=59] 0x2654260: <multiple use> 0x2652d50: i32 = MOV_IMM_I32 0x2654260 [ORD=61] 0x2612150: <multiple use> 0x2652c50: f32 = Register %vreg0 [ORD=60] 0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] 0x2653450: <multiple use> 0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=7] 0x2652950: i32 = TargetConstant<0> 0x2654260: i32 = TargetConstant<1> 0x2653f60: i32 = TargetConstant<-1> 0x2653660: ch = BasicBlock<Flow 0x2646098> 0x2612150: <multiple use> 0x2654460: f32 = Register %vreg15 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2654260: <multiple use> 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2653450: <multiple use> 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2653f60: <multiple use> 0x2653a60: i32 = Register %ONE_INT 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2652950: <multiple use> 0x2653f60: <multiple use> 0x2654260: <multiple use> 0x2654060: i32 = Register %PRED_SEL_OFF 0x2652950: <multiple use> 0x264f6a0: i32 = ADD_INT 0x2652950, 0x2652950, 0x2654260, 0x2652950, 0x2652950, 0x2652950, 0x2653450, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2653a60, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2654260, 0x2654060, 0x2652950 [ORD=61] 0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x264f6a0 0x2612150: <multiple use> 0x264fba0: ch = R600_ENDIF 0x2612150 0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0 0x2653e60: ch = BRANCH 0x2653660, 0x264f8a0 //===----------------------------------------------------------------------===// // Final Machine Code // // The ADD_INT instructions has been pulled inside the branch. //===----------------------------------------------------------------------===// BB#8: derived from LLVM BB %ENDIF39 Predecessors according to CFG: BB#6 BB#7 %vreg14<def> = PHI %vreg11, <BB#6>, %vreg13, <BB#7>; R600_Reg32:%vreg14,%vreg11,%vreg13 %vreg15<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, -1, %ONE_INT, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg15,%vreg0 R600_ENDIF BRANCH <BB#5> Successors according to CFG: BB#5
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev