@@ -504,24 +504,35 @@ exit:
504504define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context (ptr noalias %p1 , ptr noalias %p2 ) nosync {
505505; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(
506506; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] {
507- ; CHECK-NEXT: [[ENTRY:.*]]:
507+ ; CHECK-NEXT: [[ENTRY:.*:]]
508508; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
509509; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
510- ; CHECK-NEXT: br label %[[LOOP:.*]]
511- ; CHECK: [[LOOP]]:
512- ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ]
510+ ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
511+ ; CHECK: [[VECTOR_PH]]:
512+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
513+ ; CHECK: [[VECTOR_BODY]]:
514+ ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
513515; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
514- ; CHECK-NEXT: [[LD1 :%.*]] = load i8 , ptr [[ARRAYIDX2]], align 1
516+ ; CHECK-NEXT: [[WIDE_LOAD :%.*]] = load <4 x i8> , ptr [[ARRAYIDX2]], align 1
515517; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
516- ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[TMP1]], align 1
517- ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
518- ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
519- ; CHECK: [[LOOP_INC]]:
520- ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
521- ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
522- ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
518+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
519+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
520+ ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
521+ ; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
522+ ; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
523+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
524+ ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
525+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
526+ ; CHECK: [[MIDDLE_SPLIT]]:
527+ ; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
528+ ; CHECK: [[MIDDLE_BLOCK]]:
529+ ; CHECK-NEXT: br label %[[LOOP_END:.*]]
530+ ; CHECK: [[VECTOR_EARLY_EXIT]]:
531+ ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true)
532+ ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]]
533+ ; CHECK-NEXT: br label %[[LOOP_END]]
523534; CHECK: [[LOOP_END]]:
524- ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX1]] , %[[LOOP ]] ], [ -1 , %[[LOOP_INC ]] ]
535+ ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ -1 , %[[MIDDLE_BLOCK ]] ], [ [[TMP8]] , %[[VECTOR_EARLY_EXIT ]] ]
525536; CHECK-NEXT: ret i64 [[RETVAL]]
526537;
527538entry:
0 commit comments