@@ -25,117 +25,119 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) l
25
25
; CHECK-NEXT: %{{[0-9]+}} = bitcast <256 x i32>* %{{[0-9]+}} to i8*
26
26
; CHECK-NEXT: %tobool.not = icmp eq i32 %cond, 0
27
27
; CHECK-NEXT: br i1 %tobool.not, label %if.else, label %if.then
28
- ; CHECK: if.then: ; preds = %entry
28
+
29
+ ; CHECK: if.then:
29
30
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
30
31
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
31
32
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
32
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
33
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
33
34
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
34
35
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
35
36
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
36
37
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
37
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
38
- ; CHECK-NEXT: store volatile i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
38
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
39
+ ; CHECK-NEXT: store i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
39
40
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
40
41
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
41
42
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
42
43
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
43
44
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
44
45
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
45
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
46
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
46
47
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
47
48
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
48
49
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
49
50
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
50
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
51
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
51
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
52
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
52
53
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
53
54
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
54
55
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
55
56
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
56
57
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
57
58
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
58
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
59
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
59
60
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
60
61
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
61
62
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
62
63
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
63
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
64
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
64
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
65
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
65
66
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
66
67
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
67
68
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
68
69
; CHECK-NEXT: br label %if.end
69
- ; CHECK: if.else:
70
+
71
+ ; CHECK: if.else:
70
72
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
71
73
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
72
74
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
73
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
75
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
74
76
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
75
77
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
76
78
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
77
79
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
78
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
79
- ; CHECK-NEXT: store volatile i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
80
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
81
+ ; CHECK-NEXT: store i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
80
82
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
81
83
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
82
84
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
83
85
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
84
86
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
85
87
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
86
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
88
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
87
89
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
88
90
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
89
91
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
90
92
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
91
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
92
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
93
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
94
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
93
95
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
94
96
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
95
97
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
96
98
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
97
99
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
98
100
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
99
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
101
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
100
102
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
101
103
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
102
104
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
103
105
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
104
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
105
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
106
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
107
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
106
108
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
107
109
; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
108
110
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
109
111
; CHECK-NEXT: br label %if.end
110
- ; CHECK: if.end: ; preds = %if.else, %if.then
112
+ ; CHECK: if.end: ; preds = %if.else, %if.then
111
113
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
112
114
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
113
115
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
114
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
116
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
115
117
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
116
118
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
117
119
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
118
120
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
119
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
120
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
121
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
122
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
121
123
; CHECK-NEXT: %amx.tmm.1.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 49
122
124
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 18
123
125
; CHECK-NEXT: %amx.tmm.1.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
124
126
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
125
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.1.shape.row{{.*}}, align 1
126
- ; CHECK-NEXT: store volatile i16 8, i16* %amx.tmm.1.shape.col{{.*}}, align 2
127
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.1.shape.row{{.*}}, align 1
128
+ ; CHECK-NEXT: store i16 8, i16* %amx.tmm.1.shape.col{{.*}}, align 2
127
129
; CHECK-NEXT: %amx.tmm.2.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 50
128
130
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 20
129
131
; CHECK-NEXT: %amx.tmm.2.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
130
132
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
131
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.2.shape.row{{.*}}, align 1
132
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.2.shape.col{{.*}}, align 2
133
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.2.shape.row{{.*}}, align 1
134
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.2.shape.col{{.*}}, align 2
133
135
; CHECK-NEXT: %amx.tmm.3.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 51
134
136
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 22
135
137
; CHECK-NEXT: %amx.tmm.3.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
136
138
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
137
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.3.shape.row{{.*}}, align 1
138
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.3.shape.col{{.*}}, align 2
139
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.3.shape.row{{.*}}, align 1
140
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.3.shape.col{{.*}}, align 2
139
141
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
140
142
; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64)
141
143
; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64)
@@ -145,13 +147,13 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) l
145
147
; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
146
148
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
147
149
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
148
- ; CHECK-NEXT: store volatile i8 1, i8* %{{[0-9]+}}, align 1
150
+ ; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
149
151
; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
150
152
; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
151
153
; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
152
154
; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
153
- ; CHECK-NEXT: store volatile i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
154
- ; CHECK-NEXT: store volatile i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
155
+ ; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
156
+ ; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
155
157
; CHECK-NEXT: call void @llvm.x86.ldtilecfg(i8* %{{[0-9]+}})
156
158
; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64)
157
159
; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %{{[0-9]+}})
0 commit comments