@@ -1541,69 +1541,72 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
1541
1541
; CHECK: vector.body:
1542
1542
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1543
1543
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1544
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29 :%.*]], [[PRED_LOAD_CONTINUE6]] ]
1544
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 255, [[VECTOR_PH]] ], [ [[TMP34 :%.*]], [[PRED_LOAD_CONTINUE6]] ]
1545
1545
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
1546
- ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1547
- ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1546
+ ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1547
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP1]])
1548
+ ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[VEC_PHI]]
1549
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1550
+ ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1548
1551
; CHECK: pred.load.if:
1549
- ; CHECK-NEXT: [[TMP2 :%.*]] = sext i32 [[INDEX]] to i64
1550
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP2 ]]
1551
- ; CHECK-NEXT: [[TMP4 :%.*]] = load i8, i8* [[TMP3 ]], align 4
1552
- ; CHECK-NEXT: [[TMP5 :%.*]] = insertelement <4 x i8> poison, i8 [[TMP4 ]], i32 0
1552
+ ; CHECK-NEXT: [[TMP5 :%.*]] = sext i32 [[INDEX]] to i64
1553
+ ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP5 ]]
1554
+ ; CHECK-NEXT: [[TMP7 :%.*]] = load i8, i8* [[TMP6 ]], align 4
1555
+ ; CHECK-NEXT: [[TMP8 :%.*]] = insertelement <4 x i8> poison, i8 [[TMP7 ]], i32 0
1553
1556
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
1554
1557
; CHECK: pred.load.continue:
1555
- ; CHECK-NEXT: [[TMP6 :%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP5 ]], [[PRED_LOAD_IF]] ]
1556
- ; CHECK-NEXT: [[TMP7 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1557
- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1558
+ ; CHECK-NEXT: [[TMP9 :%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8 ]], [[PRED_LOAD_IF]] ]
1559
+ ; CHECK-NEXT: [[TMP10 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1560
+ ; CHECK-NEXT: br i1 [[TMP10 ]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1558
1561
; CHECK: pred.load.if1:
1559
- ; CHECK-NEXT: [[TMP8 :%.*]] = or i32 [[INDEX]], 1
1560
- ; CHECK-NEXT: [[TMP9 :%.*]] = sext i32 [[TMP8 ]] to i64
1561
- ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP9 ]]
1562
- ; CHECK-NEXT: [[TMP11 :%.*]] = load i8, i8* [[TMP10 ]], align 4
1563
- ; CHECK-NEXT: [[TMP12 :%.*]] = insertelement <4 x i8> [[TMP6 ]], i8 [[TMP11 ]], i32 1
1562
+ ; CHECK-NEXT: [[TMP11 :%.*]] = or i32 [[INDEX]], 1
1563
+ ; CHECK-NEXT: [[TMP12 :%.*]] = sext i32 [[TMP11 ]] to i64
1564
+ ; CHECK-NEXT: [[TMP13 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP12 ]]
1565
+ ; CHECK-NEXT: [[TMP14 :%.*]] = load i8, i8* [[TMP13 ]], align 4
1566
+ ; CHECK-NEXT: [[TMP15 :%.*]] = insertelement <4 x i8> [[TMP9 ]], i8 [[TMP14 ]], i32 1
1564
1567
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
1565
1568
; CHECK: pred.load.continue2:
1566
- ; CHECK-NEXT: [[TMP13 :%.*]] = phi <4 x i8> [ [[TMP6 ]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12 ]], [[PRED_LOAD_IF1]] ]
1567
- ; CHECK-NEXT: [[TMP14 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1568
- ; CHECK-NEXT: br i1 [[TMP14 ]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1569
+ ; CHECK-NEXT: [[TMP16 :%.*]] = phi <4 x i8> [ [[TMP9 ]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP15 ]], [[PRED_LOAD_IF1]] ]
1570
+ ; CHECK-NEXT: [[TMP17 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1571
+ ; CHECK-NEXT: br i1 [[TMP17 ]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1569
1572
; CHECK: pred.load.if3:
1570
- ; CHECK-NEXT: [[TMP15 :%.*]] = or i32 [[INDEX]], 2
1571
- ; CHECK-NEXT: [[TMP16 :%.*]] = sext i32 [[TMP15 ]] to i64
1572
- ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP16 ]]
1573
- ; CHECK-NEXT: [[TMP18 :%.*]] = load i8, i8* [[TMP17 ]], align 4
1574
- ; CHECK-NEXT: [[TMP19 :%.*]] = insertelement <4 x i8> [[TMP13 ]], i8 [[TMP18 ]], i32 2
1573
+ ; CHECK-NEXT: [[TMP18 :%.*]] = or i32 [[INDEX]], 2
1574
+ ; CHECK-NEXT: [[TMP19 :%.*]] = sext i32 [[TMP18 ]] to i64
1575
+ ; CHECK-NEXT: [[TMP20 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP19 ]]
1576
+ ; CHECK-NEXT: [[TMP21 :%.*]] = load i8, i8* [[TMP20 ]], align 4
1577
+ ; CHECK-NEXT: [[TMP22 :%.*]] = insertelement <4 x i8> [[TMP16 ]], i8 [[TMP21 ]], i32 2
1575
1578
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
1576
1579
; CHECK: pred.load.continue4:
1577
- ; CHECK-NEXT: [[TMP20 :%.*]] = phi <4 x i8> [ [[TMP13 ]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19 ]], [[PRED_LOAD_IF3]] ]
1578
- ; CHECK-NEXT: [[TMP21 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1579
- ; CHECK-NEXT: br i1 [[TMP21 ]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1580
+ ; CHECK-NEXT: [[TMP23 :%.*]] = phi <4 x i8> [ [[TMP16 ]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP22 ]], [[PRED_LOAD_IF3]] ]
1581
+ ; CHECK-NEXT: [[TMP24 :%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1582
+ ; CHECK-NEXT: br i1 [[TMP24 ]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1580
1583
; CHECK: pred.load.if5:
1581
- ; CHECK-NEXT: [[TMP22 :%.*]] = or i32 [[INDEX]], 3
1582
- ; CHECK-NEXT: [[TMP23 :%.*]] = sext i32 [[TMP22 ]] to i64
1583
- ; CHECK-NEXT: [[TMP24 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP23 ]]
1584
- ; CHECK-NEXT: [[TMP25 :%.*]] = load i8, i8* [[TMP24 ]], align 4
1585
- ; CHECK-NEXT: [[TMP26 :%.*]] = insertelement <4 x i8> [[TMP20 ]], i8 [[TMP25 ]], i32 3
1584
+ ; CHECK-NEXT: [[TMP25 :%.*]] = or i32 [[INDEX]], 3
1585
+ ; CHECK-NEXT: [[TMP26 :%.*]] = sext i32 [[TMP25 ]] to i64
1586
+ ; CHECK-NEXT: [[TMP27 :%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP26 ]]
1587
+ ; CHECK-NEXT: [[TMP28 :%.*]] = load i8, i8* [[TMP27 ]], align 4
1588
+ ; CHECK-NEXT: [[TMP29 :%.*]] = insertelement <4 x i8> [[TMP23 ]], i8 [[TMP28 ]], i32 3
1586
1589
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
1587
1590
; CHECK: pred.load.continue6:
1588
- ; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
1589
- ; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
1590
- ; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
1591
+ ; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i8> [ [[TMP23]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP29]], [[PRED_LOAD_IF5]] ]
1592
+ ; CHECK-NEXT: [[TMP31:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32>
1593
+ ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP31]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1594
+ ; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP32]])
1595
+ ; CHECK-NEXT: [[TMP34]] = and i32 [[TMP33]], [[TMP3]]
1591
1596
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
1592
1597
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
1593
- ; CHECK-NEXT: [[TMP30 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1594
- ; CHECK-NEXT: br i1 [[TMP30 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
1598
+ ; CHECK-NEXT: [[TMP35 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1599
+ ; CHECK-NEXT: br i1 [[TMP35 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
1595
1600
; CHECK: middle.block:
1596
- ; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]]
1597
- ; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8>
1598
- ; CHECK-NEXT: [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]])
1599
1601
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
1600
1602
; CHECK: scalar.ph:
1601
1603
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
1602
1604
; CHECK: .lr.ph:
1603
1605
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]]
1604
1606
; CHECK: ._crit_edge:
1605
- ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ undef, [[DOTLR_PH]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
1606
- ; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]]
1607
+ ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1608
+ ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[SUM_0_LCSSA]] to i8
1609
+ ; CHECK-NEXT: ret i8 [[RET]]
1607
1610
;
1608
1611
entry:
1609
1612
br label %.lr.ph
0 commit comments