@@ -26,9 +26,10 @@ define dso_local void @run_test() local_unnamed_addr #0 {
2626; CHECK-NEXT: adrp x11, A
2727; CHECK-NEXT: mov x8, xzr
2828; CHECK-NEXT: mov x9, xzr
29- ; CHECK-NEXT: movi v14 .2d, #0000000000000000
29+ ; CHECK-NEXT: movi v0 .2d, #0000000000000000
3030; CHECK-NEXT: add x10, x10, :lo12:B+48
3131; CHECK-NEXT: add x11, x11, :lo12:A
32+ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
3233; CHECK-NEXT: // implicit-def: $q1
3334; CHECK-NEXT: // implicit-def: $q2
3435; CHECK-NEXT: // implicit-def: $q3
@@ -47,8 +48,8 @@ define dso_local void @run_test() local_unnamed_addr #0 {
4748; CHECK-NEXT: // implicit-def: $q24
4849; CHECK-NEXT: // implicit-def: $q25
4950; CHECK-NEXT: // implicit-def: $q26
50- ; CHECK-NEXT: // implicit-def: $q28
5151; CHECK-NEXT: // implicit-def: $q27
52+ ; CHECK-NEXT: // implicit-def: $q28
5253; CHECK-NEXT: // implicit-def: $q29
5354; CHECK-NEXT: // implicit-def: $q30
5455; CHECK-NEXT: // implicit-def: $q31
@@ -60,78 +61,20 @@ define dso_local void @run_test() local_unnamed_addr #0 {
6061; CHECK-NEXT: // implicit-def: $q13
6162; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
6263; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
63- ; CHECK-NEXT: str q13, [sp] // 16-byte Folded Spill
6464; CHECK-NEXT: mov x12, xzr
6565; CHECK-NEXT: ldr q15, [x8]
66- ; CHECK-NEXT: mov v13.16b, v12.16b
67- ; CHECK-NEXT: mov v12.16b, v11.16b
68- ; CHECK-NEXT: mov v11.16b, v10.16b
69- ; CHECK-NEXT: mov v10.16b, v9.16b
70- ; CHECK-NEXT: mov v9.16b, v8.16b
71- ; CHECK-NEXT: mov v8.16b, v31.16b
72- ; CHECK-NEXT: mov v31.16b, v30.16b
73- ; CHECK-NEXT: mov v30.16b, v29.16b
74- ; CHECK-NEXT: mov v29.16b, v27.16b
75- ; CHECK-NEXT: mov v27.16b, v26.16b
76- ; CHECK-NEXT: mov v26.16b, v25.16b
77- ; CHECK-NEXT: mov v25.16b, v24.16b
78- ; CHECK-NEXT: mov v24.16b, v23.16b
79- ; CHECK-NEXT: mov v23.16b, v22.16b
80- ; CHECK-NEXT: mov v22.16b, v21.16b
81- ; CHECK-NEXT: mov v21.16b, v20.16b
82- ; CHECK-NEXT: mov v20.16b, v19.16b
83- ; CHECK-NEXT: mov v19.16b, v18.16b
84- ; CHECK-NEXT: mov v18.16b, v17.16b
85- ; CHECK-NEXT: mov v17.16b, v16.16b
86- ; CHECK-NEXT: mov v16.16b, v7.16b
87- ; CHECK-NEXT: mov v7.16b, v6.16b
88- ; CHECK-NEXT: mov v6.16b, v5.16b
89- ; CHECK-NEXT: mov v5.16b, v4.16b
90- ; CHECK-NEXT: mov v4.16b, v3.16b
91- ; CHECK-NEXT: mov v3.16b, v2.16b
92- ; CHECK-NEXT: mov v2.16b, v1.16b
93- ; CHECK-NEXT: mov v1.16b, v14.16b
9466; CHECK-NEXT: ldr q14, [x12]
9567; CHECK-NEXT: ldr q0, [x10], #64
9668; CHECK-NEXT: ldr x18, [x12]
9769; CHECK-NEXT: fmov x15, d15
9870; CHECK-NEXT: mov x14, v15.d[1]
99- ; CHECK-NEXT: mov x12, v14.d[1]
100- ; CHECK-NEXT: mul x1, x15, x18
10171; CHECK-NEXT: fmov x13, d14
102- ; CHECK-NEXT: mov v14.16b, v1.16b
103- ; CHECK-NEXT: mov v1.16b, v2.16b
104- ; CHECK-NEXT: mov v2.16b, v3.16b
105- ; CHECK-NEXT: mov v3.16b, v4.16b
106- ; CHECK-NEXT: mov v4.16b, v5.16b
107- ; CHECK-NEXT: mov v5.16b, v6.16b
108- ; CHECK-NEXT: mov v6.16b, v7.16b
109- ; CHECK-NEXT: mov v7.16b, v16.16b
110- ; CHECK-NEXT: mov v16.16b, v17.16b
111- ; CHECK-NEXT: mov v17.16b, v18.16b
112- ; CHECK-NEXT: mov v18.16b, v19.16b
113- ; CHECK-NEXT: mov v19.16b, v20.16b
114- ; CHECK-NEXT: mov v20.16b, v21.16b
115- ; CHECK-NEXT: mov v21.16b, v22.16b
116- ; CHECK-NEXT: mov v22.16b, v23.16b
117- ; CHECK-NEXT: mov v23.16b, v24.16b
118- ; CHECK-NEXT: mov v24.16b, v25.16b
119- ; CHECK-NEXT: mov v25.16b, v26.16b
120- ; CHECK-NEXT: mov v26.16b, v27.16b
121- ; CHECK-NEXT: mov v27.16b, v29.16b
122- ; CHECK-NEXT: mov v29.16b, v30.16b
123- ; CHECK-NEXT: mov v30.16b, v31.16b
124- ; CHECK-NEXT: mov v31.16b, v8.16b
125- ; CHECK-NEXT: mov v8.16b, v9.16b
126- ; CHECK-NEXT: mov v9.16b, v10.16b
127- ; CHECK-NEXT: mov v10.16b, v11.16b
128- ; CHECK-NEXT: mov v11.16b, v12.16b
129- ; CHECK-NEXT: mov v12.16b, v13.16b
130- ; CHECK-NEXT: ldr q13, [sp] // 16-byte Folded Reload
72+ ; CHECK-NEXT: mul x1, x15, x18
13173; CHECK-NEXT: mov x16, v0.d[1]
13274; CHECK-NEXT: fmov x17, d0
13375; CHECK-NEXT: fmov d0, x1
13476; CHECK-NEXT: mul x1, x14, x18
77+ ; CHECK-NEXT: mov x12, v14.d[1]
13578; CHECK-NEXT: ldr x0, [x8]
13679; CHECK-NEXT: mov v0.d[1], x1
13780; CHECK-NEXT: mul x1, x13, x18
@@ -144,6 +87,7 @@ define dso_local void @run_test() local_unnamed_addr #0 {
14487; CHECK-NEXT: add v11.2d, v11.2d, v0.2d
14588; CHECK-NEXT: fmov d0, x1
14689; CHECK-NEXT: mul x18, x16, x18
90+ ; CHECK-NEXT: ldr q14, [sp] // 16-byte Folded Reload
14791; CHECK-NEXT: mov v0.d[1], x18
14892; CHECK-NEXT: mul x18, x15, x0
14993; CHECK-NEXT: add x1, x11, x8
@@ -194,28 +138,29 @@ define dso_local void @run_test() local_unnamed_addr #0 {
194138; CHECK-NEXT: mul x17, x17, x1
195139; CHECK-NEXT: mov v0.d[1], x12
196140; CHECK-NEXT: mul x16, x16, x1
197- ; CHECK-NEXT: add v27 .2d, v27 .2d, v0.2d
141+ ; CHECK-NEXT: add v28 .2d, v28 .2d, v0.2d
198142; CHECK-NEXT: fmov d0, x17
199143; CHECK-NEXT: mov v0.d[1], x16
200144; CHECK-NEXT: add x8, x8, #8 // =8
201- ; CHECK-NEXT: add v28 .2d, v28 .2d, v0.2d
145+ ; CHECK-NEXT: add v27 .2d, v27 .2d, v0.2d
202146; CHECK-NEXT: cmp x8, #64 // =64
203147; CHECK-NEXT: add x9, x9, #1 // =1
148+ ; CHECK-NEXT: str q14, [sp] // 16-byte Folded Spill
204149; CHECK-NEXT: b.ne .LBB0_1
205150; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
206151; CHECK-NEXT: adrp x8, C
207152; CHECK-NEXT: add x8, x8, :lo12:C
153+ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
208154; CHECK-NEXT: stp q13, q12, [x8]
209155; CHECK-NEXT: stp q11, q10, [x8, #32]
210156; CHECK-NEXT: stp q9, q8, [x8, #64]
211- ; CHECK-NEXT: stp q14, q2, [x8, #464]
212157; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
213158; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
214159; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
215160; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
216161; CHECK-NEXT: stp q31, q30, [x8, #96]
217- ; CHECK-NEXT: stp q29, q27 , [x8, #144]
218- ; CHECK-NEXT: stp q28 , q26, [x8, #176]
162+ ; CHECK-NEXT: stp q29, q28 , [x8, #144]
163+ ; CHECK-NEXT: stp q27 , q26, [x8, #176]
219164; CHECK-NEXT: str q25, [x8, #208]
220165; CHECK-NEXT: stp q24, q23, [x8, #240]
221166; CHECK-NEXT: stp q22, q21, [x8, #272]
@@ -224,6 +169,7 @@ define dso_local void @run_test() local_unnamed_addr #0 {
224169; CHECK-NEXT: stp q16, q7, [x8, #368]
225170; CHECK-NEXT: stp q6, q5, [x8, #400]
226171; CHECK-NEXT: stp q4, q3, [x8, #432]
172+ ; CHECK-NEXT: stp q0, q2, [x8, #464]
227173; CHECK-NEXT: str q1, [x8, #496]
228174; CHECK-NEXT: add sp, sp, #80 // =80
229175; CHECK-NEXT: ret
0 commit comments