-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathFloat16Support.cpp
197 lines (177 loc) · 7.65 KB
/
Float16Support.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
//===------------- Float16Support.cpp - Swift Float16 Support -------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Implementations of:
//
// __gnu_h2f_ieee
// __gnu_f2h_ieee
// __truncdfhf2
// __extendhfxf2
//
// On Darwin platforms, these are provided by the host compiler-rt, but we
// can't depend on that everywhere, so we have to provide them in the Swift
// runtime. Calls to these symbols are automatically generated by LLVM when
// operating on Float16, so they are used *even though they appear to have
// no call sites anywhere in Swift*.
//
// These may require different naming or mangling on other targets; what I've
// setup here is correct for Linux/x86.
//
//===----------------------------------------------------------------------===//
// Android NDK <r21 do not provide `__aeabi_d2h` in the compiler runtime,
// provide shims in that case.
#if (defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_EABI__)) || \
((defined(__i386__) || defined(__i686__) || defined(__x86_64__)) && !defined(__APPLE__))
#include "swift/shims/Visibility.h"
static unsigned toEncoding(float f) {
unsigned e;
static_assert(sizeof e == sizeof f, "float and int must have the same size");
__builtin_memcpy(&e, &f, sizeof f);
return e;
}
static float fromEncoding(unsigned int e) {
float f;
static_assert(sizeof f == sizeof e, "float and int must have the same size");
__builtin_memcpy(&f, &e, sizeof f);
return f;
}
static unsigned short toEncoding(_Float16 f) {
unsigned short s;
static_assert(sizeof s == sizeof f, "_Float16 and short must have the same size");
__builtin_memcpy(&s, &f, sizeof f);
return s;
}
static _Float16 fromEncoding(unsigned short s) {
_Float16 f;
static_assert(sizeof s == sizeof f, "_Float16 and short must have the same size");
__builtin_memcpy(&f, &s, sizeof f);
return f;
}
#if defined(__x86_64__) && defined(__F16C__)
// If we're compiling the runtime for a target that has the conversion
// instruction, we might as well just use those. In theory, we'd also be
// compiling Swift for that target and not need these builtins at all,
// but who knows what could go wrong, and they're tiny functions.
# include <immintrin.h>
SWIFT_RUNTIME_EXPORT float __gnu_h2f_ieee(short h) {
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_set_epi64x(0,h)));
}
SWIFT_RUNTIME_EXPORT short __gnu_f2h_ieee(float f) {
return (unsigned short)_mm_cvtsi128_si32(
_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION)
);
}
#else
// Input in di, result in xmm0. We can get that calling convention in C++
// by taking a int16 arg instead of Float16, which we don't have (or else
// we wouldn't need this function).
SWIFT_RUNTIME_EXPORT float __gnu_h2f_ieee(unsigned short h) {
// We need to have two cases; subnormals and zeros, and everything else.
// We are in the first case if the exponent field (bits 14:10) is zero:
if ((h & 0x7c00) == 0) {
// Sign-extend and mask so that we get a subnormal or zero in f32
// with the appropriate sign, then multiply by the appropriate scale
// factor to produce the f32 result.
return 0x1.0p125f * fromEncoding((int)(short)h & 0x80007fffU);
}
// We have either a normal number of an infinity or NaN. All of these
// can be handled by shifting the significand into the correct position,
// extending the exponent, and then multiplying by the correct scale.
return 0x1.0p-112f * fromEncoding((int)(short)h << 13 | 0x70000000U);
}
// Input in xmm0, result in di. We can get that calling convention in C++
// by returning int16 instead of Float16, which we don't have (or else
// we wouldn't need this function).
SWIFT_RUNTIME_EXPORT unsigned short __gnu_f2h_ieee(float f) {
unsigned signbit = toEncoding(f) & 0x80000000U;
// Construct a "magic" rounding constant for f; this is a value that
// we will add and subtract from f to force rounding to occur in the
// correct position for half-precision. Half has 10 significand bits,
// float has 23, so we need to add 2**(e+13) to get the desired rounding.
float magic;
unsigned exponent = toEncoding(f) & 0x7f800000;
// Subnormals all round in the same place as the minimum normal binade,
// so treat anything below 0x1.0p-14 as 0x1.0p-14.
if (exponent < 0x38800000) exponent = 0x38800000;
// In the overflow, inf, and NaN cases, magic doesn't contribute, so we
// just use zero for anything bigger than 0x1.0p16.
if (exponent > 0x47000000) magic = fromEncoding(signbit);
else magic = fromEncoding(signbit | exponent + 0x06800000);
// Map anything with an exponent larger than 15 to infinity; this will
// avoid special-casing overflow later on.
f = 0x1.0p112f*f;
f = 0x1.0p-112f*f + magic;
f -= magic;
// We've now rounded in the correct place. One more scaling and we have
// all the bits we need (this multiply does not change anything for
// normal results, but denormalizes tiny results exactly as needed).
f *= 0x1.0p-112f;
short magnitude = toEncoding(f) >> 13 & 0x7fff;
return (int)signbit >> 16 | magnitude;
}
#endif
// Input in xmm0, result in di. We can get that calling convention in C++
// by returning uint16 instead of Float16, which we don't have (or else
// we wouldn't need this function).
//
// Note that F16C doesn't provide this operation, so we still need a software
// implementation on those cores.
SWIFT_RUNTIME_EXPORT _Float16 __truncdfhf2(double d) {
// You can't just do (half)(float)x, because that makes the result
// susceptible to double-rounding. Instead we need to make the first
// rounding use round-to-odd, but that doesn't exist on x86, so we have
// to fake it.
float f = (float)d;
// Double-rounding can only occur if the result of rounding to float is
// an exact-halfway case for the subsequent rounding to float16. We
// can check for that significand bit pattern quickly (though we need
// to be careful about values that will result in a subnormal float16,
// as those will round in a different position):
unsigned e = toEncoding(f);
bool exactHalfway = (e & 0x1fff) == 0x1000;
double fabs = __builtin_fabsf(f);
if (exactHalfway || __builtin_fabsf(f) < 0x1.0p-14f) {
// We might be in a double-rounding case, so simulate sticky-rounding
// by comparing f and x and adjusting as needed.
double dabs = __builtin_fabs(d);
if (fabs > dabs) e -= ~e & 1;
if (fabs < dabs) e |= 1;
f = fromEncoding(e);
}
return fromEncoding(__gnu_f2h_ieee(f));
}
// Convert from Float16 to long double.
//
// Since Float32 covers the entire range
// of Float16 values and since we already know how to convert Float32 to long
// double (which, at least on x86, doesn't involve function calls), we just
// let the compiler do the latter part for us.
//
// There's no risk of rounding problems from the double conversion, because
// we're extending.
SWIFT_RUNTIME_EXPORT long double __extendhfxf2(_Float16 h) {
return __gnu_h2f_ieee(toEncoding(h));
}
// This is just an alternative name for __gnu_h2f_ieee
SWIFT_RUNTIME_EXPORT float __extendhfsf2(_Float16 h) {
return __gnu_h2f_ieee(toEncoding(h));
}
// Same again but for __gnu_f2h_ieee
SWIFT_RUNTIME_EXPORT _Float16 __truncsfhf2(float f) {
return fromEncoding(__gnu_f2h_ieee(f));
}
#if defined(__ARM_EABI__)
SWIFT_RUNTIME_EXPORT unsigned short __aeabi_d2h(double d) {
return __truncdfhf2(d);
}
#endif
#endif // defined(__x86_64__) && !defined(__APPLE__)