Skip to content

Commit 3a39062

Browse files
authored
Merge pull request #12 from xianyi/develop
resync with upstream
2 parents 17609f8 + eaa0be1 commit 3a39062

10 files changed

+1511
-26
lines changed

.travis.yml

+8
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,14 @@ matrix:
173173
env:
174174
- BTYPE="BINARY=32 FC=gfortran-8"
175175

176+
- <<: *test-macos
177+
osx_image: xcode10.1
178+
env:
179+
- COMMON_FLAGS="NUM_THREADS=32"
180+
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk"
181+
- CFLAGS="-O2 -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
182+
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang"
183+
176184
# whitelist
177185
branches:
178186
only:

c_check

+13
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,19 @@ if ($architecture ne $hostarch) {
260260

261261
$cross = 1 if ($os ne $hostos);
262262

263+
# rework cross suffix and architecture if we are on OSX cross-compiling for ARMV8-based IOS
264+
# the initial autodetection will have been confused by the command-line arguments to clang
265+
# and the cross-compiler apparently still claims to build for x86_64 in its CC -E output
266+
if (($os eq "Darwin") && ($cross_suffix ne "")) {
267+
my $tmpnam = `xcrun --sdk iphoneos --find clang`;
268+
$cross_suffix = substr($tmpnam, 0, rindex($tmpnam, "/")+1 );
269+
# this should produce something like $cross_suffix="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/";
270+
$cross =1;
271+
$architecture = arm64;
272+
}
273+
274+
275+
263276
$openmp = "" if $ENV{USE_OPENMP} != 1;
264277

265278
$linker_L = "";

common_arm64.h

+2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
107107
.text ;
108108
.p2align 2 ;
109109
.global REALNAME ;
110+
#ifndef __APPLE__
110111
.type REALNAME, %function ;
112+
#endif
111113
REALNAME:
112114
.endm
113115

cpuid_x86.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -1379,8 +1379,6 @@ int get_cpuname(void){
13791379
break;
13801380
case 7: // family 6 exmodel 7
13811381
switch (model) {
1382-
case 10: // Goldmont Plus
1383-
return CPUTYPE_NEHALEM;
13841382
case 14: // Ice Lake
13851383
if(support_avx512())
13861384
return CPUTYPE_SKYLAKEX;
@@ -1427,7 +1425,11 @@ int get_cpuname(void){
14271425
case 0x5:
14281426
return CPUTYPE_AMDK6;
14291427
case 0x6:
1428+
#if defined(__x86_64__) || defined(__amd64__)
1429+
return CPUTYPE_BARCELONA;
1430+
#else
14301431
return CPUTYPE_ATHLON;
1432+
#endif
14311433
case 0xf:
14321434
switch (exfamily) {
14331435
case 0:
@@ -1810,7 +1812,11 @@ int get_coretype(void){
18101812
case 4:
18111813
case 5:
18121814
case 6:
1815+
#if defined(__x86_64__) || defined(__amd64__)
1816+
return CORE_CORE2;
1817+
#else
18131818
return CORE_P6;
1819+
#endif
18141820
case 7:
18151821
return CORE_KATMAI;
18161822
case 8:
@@ -2017,7 +2023,11 @@ int get_coretype(void){
20172023

20182024
if (vendor == VENDOR_AMD){
20192025
if (family <= 0x5) return CORE_80486;
2026+
#if defined(__x86_64__) || defined(__amd64__)
2027+
if (family <= 0xe) return CORE_BARCELONA;
2028+
#else
20202029
if (family <= 0xe) return CORE_ATHLON;
2030+
#endif
20212031
if (family == 0xf){
20222032
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
20232033
else if (exfamily == 5) return CORE_BOBCAT;

driver/others/dynamic_arm64.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@
3737
/*********************************************************************/
3838

3939
#include "common.h"
40+
#if (defined OS_LINUX || defined OS_ANDROID)
4041
#include <asm/hwcap.h>
4142
#include <sys/auxv.h>
43+
#endif
4244

4345
extern gotoblas_t gotoblas_ARMV8;
4446
extern gotoblas_t gotoblas_CORTEXA57;
@@ -105,13 +107,17 @@ static gotoblas_t *force_coretype(char *coretype) {
105107

106108
static gotoblas_t *get_coretype(void) {
107109
int implementer, variant, part, arch, revision, midr_el1;
108-
110+
111+
#if (defined OS_LINUX || defined OS_ANDROID)
109112
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
110113
char coremsg[128];
111114
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
112115
openblas_warning(1, coremsg);
113116
return NULL;
114117
}
118+
#else
119+
return NULL;
120+
#endif
115121

116122
get_cpu_ftr(MIDR_EL1, midr_el1);
117123
/*

kernel/arm64/znrm2.S

+19-19
Original file line numberDiff line numberDiff line change
@@ -123,69 +123,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
123123
#if !defined(DOUBLE)
124124
ldr s4, [X]
125125
fcmp s4, REGZERO
126-
beq KERNEL_S1_NEXT_\@
126+
beq KERNEL_S1_NEXT
127127
fabs s4, s4
128128
fcmp SCALE, s4
129-
bge KERNEL_S1_SCALE_GE_XR_\@
129+
bge KERNEL_S1_SCALE_GE_XR
130130
fdiv s2, SCALE, s4
131131
fmul s2, s2, s2
132132
fmul s3, SSQ, s2
133133
fadd SSQ, REGONE, s3
134134
fmov SCALE, s4
135-
b KERNEL_S1_NEXT_\@
136-
KERNEL_S1_SCALE_GE_XR_\@:
135+
b KERNEL_S1_NEXT
136+
KERNEL_S1_SCALE_GE_XR:
137137
fdiv s2, s4, SCALE
138138
fmla SSQ, s2, v2.s[0]
139-
KERNEL_S1_NEXT_\@:
139+
KERNEL_S1_NEXT:
140140
ldr s5, [X, #4]
141141
fcmp s5, REGZERO
142-
beq KERNEL_S1_END_\@
142+
beq KERNEL_S1_END
143143
fabs s5, s5
144144
fcmp SCALE, s5
145-
bge KERNEL_S1_SCALE_GE_XI_\@
145+
bge KERNEL_S1_SCALE_GE_XI
146146
fdiv s2, SCALE, s5
147147
fmul s2, s2, s2
148148
fmul s3, SSQ, s2
149149
fadd SSQ, REGONE, s3
150150
fmov SCALE, s5
151-
b KERNEL_S1_END_\@
152-
KERNEL_S1_SCALE_GE_XI_\@:
151+
b KERNEL_S1_END
152+
KERNEL_S1_SCALE_GE_XI:
153153
fdiv s2, s5, SCALE
154154
fmla SSQ, s2, v2.s[0]
155155
#else
156156
ldr d4, [X]
157157
fcmp d4, REGZERO
158-
beq KERNEL_S1_NEXT_\@
158+
beq KERNEL_S1_NEXT
159159
fabs d4, d4
160160
fcmp SCALE, d4
161-
bge KERNEL_S1_SCALE_GE_XR_\@
161+
bge KERNEL_S1_SCALE_GE_XR
162162
fdiv d2, SCALE, d4
163163
fmul d2, d2, d2
164164
fmul d3, SSQ, d2
165165
fadd SSQ, REGONE, d3
166166
fmov SCALE, d4
167-
b KERNEL_S1_NEXT_\@
168-
KERNEL_S1_SCALE_GE_XR_\@:
167+
b KERNEL_S1_NEXT
168+
KERNEL_S1_SCALE_GE_XR:
169169
fdiv d2, d4, SCALE
170170
fmla SSQ, d2, v2.d[0]
171-
KERNEL_S1_NEXT_\@:
171+
KERNEL_S1_NEXT:
172172
ldr d5, [X, #8]
173173
fcmp d5, REGZERO
174-
beq KERNEL_S1_END_\@
174+
beq KERNEL_S1_END
175175
fabs d5, d5
176176
fcmp SCALE, d5
177-
bge KERNEL_S1_SCALE_GE_XI_\@
177+
bge KERNEL_S1_SCALE_GE_XI
178178
fdiv d2, SCALE, d5
179179
fmul d2, d2, d2
180180
fmul d3, SSQ, d2
181181
fadd SSQ, REGONE, d3
182182
fmov SCALE, d5
183-
b KERNEL_S1_END_\@
184-
KERNEL_S1_SCALE_GE_XI_\@:
183+
b KERNEL_S1_END
184+
KERNEL_S1_SCALE_GE_XI:
185185
fdiv d2, d5, SCALE
186186
fmla SSQ, d2, v2.d[0]
187187
#endif
188-
KERNEL_S1_END_\@:
188+
KERNEL_S1_END:
189189
add X, X, INC_X
190190
.endm
191191

kernel/x86_64/KERNEL.SKYLAKEX

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@ SGEMMITCOPY = sgemm_tcopy_16_skylakex.c
77
SGEMMONCOPY = sgemm_ncopy_4_skylakex.c
88
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
99

10-
#DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
10+
DGEMMKERNEL = dgemm_kernel_4x8_skylakex_2.c
1111

12-
#DGEMMINCOPY = dgemm_ncopy_8_skylakex.c
13-
#DGEMMITCOPY = dgemm_tcopy_8_skylakex.c
1412
DGEMMONCOPY = dgemm_ncopy_8_skylakex.c
1513
DGEMMOTCOPY = dgemm_tcopy_8_skylakex.c
1614

0 commit comments

Comments
 (0)