-
Notifications
You must be signed in to change notification settings - Fork 140
/
Copy pathdirectives.h
386 lines (347 loc) · 12.7 KB
/
directives.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*/
/**
* \file
* \brief directives.h - define macros for asm directives
*/
#if ! defined (__ASSEMBLER__)
#define __ASSEMBLER__
#endif /* ! defined (__ASSEMBLER__) */
#define _ASM_CONCAT(l,r) l##r
#define ASM_CONCAT(l,r) _ASM_CONCAT(l,r)
#define _ASM_CONCAT3(l,m,r) l##m##r
#define ASM_CONCAT3(l,m,r) _ASM_CONCAT3(l,m,r)
#define _ASM_STRINGIFY(s) #s
#define ASM_STRINGIFY(s) _ASM_STRINGIFY(s)
#if defined(TARGET_WIN_X8664)
#define ENT(n) n
#define ALN_WORD .align 4
#define ALN_FUNC .align 16
#define ALN_DBLE .align 8
#define ALN_QUAD .align 16
#if defined(__clang__)
/*
* https://stackoverflow.com/questions/1317081/gccs-assembly-output-of-an-empty-program-on-x86-win32
* Debugging info for function 's'
* .scl 2: storage class external(2)
* .type 32: symbol is a function
*/
#define ELF_FUNC(s) .def s; .scl 2; .type 32 ; .endef
#define ELF_OBJ(s)
#define ELF_SIZE(s)
#else
#define ELF_FUNC(s) .type ENT(s), @function
#define ELF_OBJ(s) .type ENT(s), @object
#define ELF_SIZE(s) .size ENT(s), .- ENT(s)
#endif
#define ELF_HIDDEN(s)
#define AS_VER .version "01.01"
#define I1 %rcx
#define I1W %ecx
#define I2 %rdx
#define I2W %edx
#define I3 %r8
#define I3W %r8d
#define I4 %r9
#define I4W %r9d
#define F1 %xmm0
#define F2 %xmm1
#define F3 %xmm2
#define F4 %xmm3
#elif defined(LINUX_ELF) || defined(TARGET_LINUX_X86) || defined(TARGET_LINUX_X8664)
/*
* For X86-64 ELF enabled objects, disable stack execute bit.
*
* Assume that this file is one of the first include files listed in assembly
* source files that need preprocessing.
*/
#if ! defined(NOTE_GNU_STACK)
# define NOTE_GNU_STACK
.section .note.GNU-stack,"",%progbits
.text
#endif // #if ! defined(NOTE_GNU_STACK)
#define ENT(n) n
#define ALN_WORD .align 4
#define ALN_FUNC .align 16
#define ALN_DBLE .align 8
#define ALN_QUAD .align 16
#define ELF_FUNC(s) .type ENT(s), @function
#define ELF_OBJ(s) .type ENT(s), @object
#define ELF_SIZE(s) .size ENT(s), .- ENT(s)
#define ELF_HIDDEN(s) .globl s ; .hidden s
#define AS_VER .version "01.01"
#define I1 %rdi
#define I1W %edi
#define I2 %rsi
#define I2W %esi
#define I3 %rdx
#define I3W %edx
#define I4 %rcx
#define I4W %ecx
#define F1 %xmm0
#define F2 %xmm1
#define F3 %xmm2
#define F4 %xmm3
#elif defined(TARGET_OSX_X8664)
#define ENT(n) ASM_CONCAT(_,n)
#define ALN_WORD .align 2
#define ALN_FUNC .align 4
#define ALN_DBLE .align 3
#define ALN_QUAD .align 4
#define ELF_FUNC(s)
#define ELF_OBJ(s)
#define ELF_SIZE(s)
#define ELF_HIDDEN(s)
#define AS_VER
#define I1 %rdi
#define I1W %edi
#define I2 %rsi
#define I2W %esi
#define I3 %rdx
#define I3W %edx
#define I4 %rcx
#define I4W %ecx
#define F1 %xmm0
#define F2 %xmm1
#define F3 %xmm2
#define F4 %xmm3
#else
#error X8664 TARGET platform not defined.
#error TARGET must be one of TARGET_LINUX_X8664, TARGET_OSX_X8664, or TARGET_WIN_X8664.
#endif
/* macros for handling pic and non-pic code */
#if defined(PG_PIC) && ! defined (TARGET_OSX_X8664)
#define GBLTXT(fn) fn @PLT
#define LDL(var, tmp, lreg) \
leaq var(% rip), tmp; \
movl (tmp), lreg
#define STL(lreg, tmp, var) \
leaq var(% rip), tmp; \
movl lreg, (tmp)
#define LDQ(var, tmp, qreg) \
leaq var(% rip), tmp; \
movq (tmp), qreg
#define STQ(qreg, tmp, var) \
leaq var(% rip), tmp; \
movq qreg, (tmp)
#define LDDQU(var, tmp, qreg) \
leaq var(% rip), tmp; \
movdqu (tmp), qreg
#define STDQU(qreg, tmp, var) \
leaq var(% rip), tmp; \
movdqu qreg, (tmp)
#define LEAQ(var, tmp) leaq var(% rip), tmp
#define XCHGL(lreg, tmp, var) \
leaq var(% rip), tmp; \
xchgl lreg, (tmp)
#define FNSTCW(tmp, var) \
leaq var(% rip), tmp; \
fnstcw (tmp)
#define FLDCW(var, tmp) \
leaq var(% rip), tmp; \
fldcw (tmp)
#else
#define GBLTXT(fn) fn
#define LDL(var, tmp, lreg) movl var(% rip), lreg
#define STL(lreg, tmp, var) movl lreg, var(% rip)
#define LDQ(var, tmp, qreg) movq var(% rip), qreg
#define STQ(qreg, tmp, var) movq qreg, var(% rip)
#define LDDQU(var, tmp, qreg) movdqu var(% rip), qreg
#define STDQU(qreg, tmp, var) movdqu qreg, var(% rip)
#define LEAQ(var, tmp) leaq var(% rip), tmp
#define XCHGL(lreg, tmp, var) xchgl lreg, var(% rip)
#define FNSTCW(tmp, var) fnstcw var(% rip)
#define FLDCW(var, tmp) fldcw var(% rip)
#endif
#define CALL(fn) call GBLTXT(fn)
#define JMP(fn) jmp GBLTXT(fn)
/* macros for handling the red zone of a stack, i.e., the 128-byte area
* below a leaf function's stack pointer as defined in the linux abi.
* For other enviroments, the red zone must be allocated.
* Macros:
* RZ_PUSH -- create the 128-byte redzone
* RZ_POP -- relinquish the 128-byte redzone
* RZ_OFF(n) -- produce the offset representing n (positive) bytes below the
* stackpointer wrt the linux abi; e.g., for the linux abi
* RZ_OFF(24)(%rsp) => -24(%rsp); for other systems, the
* offset would be computed as 128-24.
*/
#if defined(TARGET_WIN_X8664)
#define RZ_PUSH subq $128, % rsp
#define RZ_POP addq $128, % rsp
#define RZ_OFF(n) (128## - n)
#else
#define RZ_PUSH
#define RZ_POP
#define RZ_OFF(n) -##n
#endif
#if defined(GH_TARGET)
#define LDDP movsd
#else
#define LDDP movlpd
#endif
#ifdef LBL__
#error Macro "LBL__" should not be defined
#endif
#define LBL__(_n,_t) _n ## _ ## _t
#ifdef LBL_
#error Macro "LBL_" should not be defined
#endif
#define LBL_(_n, _m) LBL__(_n, _m)
#ifdef LBL
#error Macro "LBL" should not be defined
#endif
#define LBL(_n) LBL_(_n, NNN)
/*
* Define a set of generic vex prefixed floating point instructions
* (macros) that can be used for both Intel 3 operand and AMD's 4
* operand instruction sets.
*
* Conform to Intel's instruction notation.
*
* Instructions (macros) use the C preprocessor and not the ".macro"
* facility builtin with most assemblers because of the inconsistent
* syntax and availability of pseudo ops across various platforms.
*
* Selection between FMA3 and FMA4(AMD only) instructions is controlled
* by macro "VFMA_IS_FMA3_OR_FMA4" which must be defined to be either
* "FMA3" or "FMA4".
*
* Map:
* Instruction "Pseudo OP" (CPP macro)
* vfmadd{132,213,231}{ss,sd,ps,pd} VFMA_{132,213,231}{SS,SD,PS,PD}
* vfnmadd{132,213,231}{ss,sd,ps,pd} VNFMA_{132,213,231}{SS,SD,PS,PD}
* vfmsub{132,213,231}{ss,sd,ps,pd} VFMS_{132,213,231}{SS,SD,PS,PD}
* vfnmsub{132,213,231}{ss,sd,ps,pd} VNFMS_{132,213,231}{SS,SD,PS,PD}
*/
#define VFMA_FMA4_ORDER_132(src_rm,src_r,dst) src_r,src_rm,dst,dst
#define VFMA_FMA4_ORDER_213(src_rm,src_r,dst) src_rm,src_r,dst,dst
#define VFMA_FMA4_ORDER_231(src_rm,src_r,dst) dst,src_rm,src_r,dst
#define IF_VFMA_IS_FMA3(op,order,size,src_rm,src_r,dst) \
ASM_CONCAT3(op,order,size) src_rm,src_r,dst
#define IF_VFMA_IS_FMA4(op,order,size,src_rm,src_r,dst) \
ASM_CONCAT(op,size) ASM_CONCAT(VFMA_FMA4_ORDER_,order)(src_rm,src_r,dst)
#define VFMA_FMA3_OR_FMA4(op,order,size,src_rm,src_r,dst) \
ASM_CONCAT(IF_VFMA_IS_,VFMA_IS_FMA3_OR_FMA4)(op,order,size,src_rm,src_r,dst)
/*
* A couple of true/false (1/0) macros to use to determine whether FMA3 or
* FMA4 is being targetted.
*
* Note: Macro "VFMA_IS_FMA3_OR_FMA4" must be defined.
*/
#define VFMA_IS_FMA3_(l,r) l
#define VFMA_IS_FMA4_(l,r) r
#define VFMA_IS_FMA3 ASM_CONCAT3(VFMA_IS_,VFMA_IS_FMA3_OR_FMA4,_)(1,0)
#define VFMA_IS_FMA4 ASM_CONCAT3(VFMA_IS_,VFMA_IS_FMA3_OR_FMA4,_)(0,1)
/*
* VEX floating multiply add.
*/
#define VFMA_132SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,132,ss,src_rm,src_r,dst)
#define VFMA_132SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,132,sd,src_rm,src_r,dst)
#define VFMA_132PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,132,ps,src_rm,src_r,dst)
#define VFMA_132PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,132,pd,src_rm,src_r,dst)
#define VFMA_213SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,213,ss,src_rm,src_r,dst)
#define VFMA_213SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,213,sd,src_rm,src_r,dst)
#define VFMA_213PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,213,ps,src_rm,src_r,dst)
#define VFMA_213PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,213,pd,src_rm,src_r,dst)
#define VFMA_231SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,231,ss,src_rm,src_r,dst)
#define VFMA_231SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,231,sd,src_rm,src_r,dst)
#define VFMA_231PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,231,ps,src_rm,src_r,dst)
#define VFMA_231PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmadd,231,pd,src_rm,src_r,dst)
/*
* VEX floating -multiply add.
*/
#define VFNMA_132SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,132,ss,src_rm,src_r,dst)
#define VFNMA_132SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,132,sd,src_rm,src_r,dst)
#define VFNMA_132PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,132,ps,src_rm,src_r,dst)
#define VFNMA_132PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,132,pd,src_rm,src_r,dst)
#define VFNMA_213SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,213,ss,src_rm,src_r,dst)
#define VFNMA_213SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,213,sd,src_rm,src_r,dst)
#define VFNMA_213PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,213,ps,src_rm,src_r,dst)
#define VFNMA_213PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,213,pd,src_rm,src_r,dst)
#define VFNMA_231SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,231,ss,src_rm,src_r,dst)
#define VFNMA_231SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,231,sd,src_rm,src_r,dst)
#define VFNMA_231PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,231,ps,src_rm,src_r,dst)
#define VFNMA_231PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmadd,231,pd,src_rm,src_r,dst)
/*
* VEX floating multiply subtract.
*/
#define VFMS_132SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,132,ss,src_rm,src_r,dst)
#define VFMS_132SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,132,sd,src_rm,src_r,dst)
#define VFMS_132PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,132,ps,src_rm,src_r,dst)
#define VFMS_132PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,132,pd,src_rm,src_r,dst)
#define VFMS_213SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,213,ss,src_rm,src_r,dst)
#define VFMS_213SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,213,sd,src_rm,src_r,dst)
#define VFMS_213PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,213,ps,src_rm,src_r,dst)
#define VFMS_213PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,213,pd,src_rm,src_r,dst)
#define VFMS_231SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,231,ss,src_rm,src_r,dst)
#define VFMS_231SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,231,sd,src_rm,src_r,dst)
#define VFMS_231PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,231,ps,src_rm,src_r,dst)
#define VFMS_231PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfmsub,231,pd,src_rm,src_r,dst)
/*
* VEX floating -multiply subtract.
*/
#define VFNMS_132SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,132,ss,src_rm,src_r,dst)
#define VFNMS_132SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,132,sd,src_rm,src_r,dst)
#define VFNMS_132PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,132,ps,src_rm,src_r,dst)
#define VFNMS_132PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,132,pd,src_rm,src_r,dst)
#define VFNMS_213SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,213,ss,src_rm,src_r,dst)
#define VFNMS_213SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,213,sd,src_rm,src_r,dst)
#define VFNMS_213PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,213,ps,src_rm,src_r,dst)
#define VFNMS_213PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,213,pd,src_rm,src_r,dst)
#define VFNMS_231SS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,231,ss,src_rm,src_r,dst)
#define VFNMS_231SD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,231,sd,src_rm,src_r,dst)
#define VFNMS_231PS(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,231,ps,src_rm,src_r,dst)
#define VFNMS_231PD(src_rm,src_r,dst) \
VFMA_FMA3_OR_FMA4(vfnmsub,231,pd,src_rm,src_r,dst)