Skip to content

Commit 821ef34

Browse files
Add A64FX to the list of CPUs supported by DYNAMIC_ARCH
1 parent a815594 commit 821ef34

File tree

5 files changed

+65
-2
lines changed

5 files changed

+65
-2
lines changed

Makefile.system

+1
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ ifneq ($(NO_SVE), 1)
689689
DYNAMIC_CORE += NEOVERSEV1
690690
DYNAMIC_CORE += NEOVERSEN2
691691
DYNAMIC_CORE += ARMV8SVE
692+
DYNAMIC_CORE += A64FX
692693
endif
693694
DYNAMIC_CORE += THUNDERX
694695
DYNAMIC_CORE += THUNDERX2T99

cmake/arch.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ if (DYNAMIC_ARCH)
4646
if (ARM64)
4747
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
4848
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
49-
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE)
49+
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
5050
endif ()
5151
if (DYNAMIC_LIST)
5252
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})

cmake/prebuild.cmake

+31
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,37 @@ endif ()
12181218
set(ZGEMM_UNROLL_M 4)
12191219
set(ZGEMM_UNROLL_N 4)
12201220
set(SYMV_P 16)
1221+
elseif ("${TCORE}" STREQUAL "A64FX")
1222+
file(APPEND ${TARGET_CONF_TEMP}
1223+
"#define L1_CODE_SIZE\t65536\n"
1224+
"#define L1_CODE_LINESIZE\t256\n"
1225+
"#define L1_CODE_ASSOCIATIVE\t8\n"
1226+
"#define L1_DATA_SIZE\t32768\n"
1227+
"#define L1_DATA_LINESIZE\t256\n"
1228+
"#define L1_DATA_ASSOCIATIVE\t8\n"
1229+
"#define L2_SIZE\t8388608\n\n"
1230+
"#define L2_LINESIZE\t256\n"
1231+
"#define L2_ASSOCIATIVE\t8\n"
1232+
"#define L3_SIZE\t0\n\n"
1233+
"#define L3_LINESIZE\t0\n\n"
1234+
"#define L3_ASSOCIATIVE\t0\n\n"
1235+
"#define DTB_DEFAULT_ENTRIES\t64\n"
1236+
"#define DTB_SIZE\t4096\n"
1237+
"#define HAVE_VFPV4\n"
1238+
"#define HAVE_VFPV3\n"
1239+
"#define HAVE_VFP\n"
1240+
"#define HAVE_NEON\n"
1241+
"#define HAVE_SVE\n"
1242+
"#define ARMV8\n")
1243+
set(SGEMM_UNROLL_M 4)
1244+
set(SGEMM_UNROLL_N 8)
1245+
set(DGEMM_UNROLL_M 2)
1246+
set(DGEMM_UNROLL_N 8)
1247+
set(CGEMM_UNROLL_M 2)
1248+
set(CGEMM_UNROLL_N 4)
1249+
set(ZGEMM_UNROLL_M 2)
1250+
set(ZGEMM_UNROLL_N 4)
1251+
set(SYMV_P 16)
12211252
elseif ("${TCORE}" STREQUAL "P5600")
12221253
file(APPEND ${TARGET_CONF_TEMP}
12231254
"#define L2_SIZE 1048576\n"

cmake/system.cmake

+12
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,18 @@ if (${TARGET} STREQUAL NEOVERSEV1)
310310
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
311311
endif()
312312
endif()
313+
if (${TARGET} STREQUAL A64FX)
314+
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
315+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx")
316+
else ()
317+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
318+
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
319+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve -mtune=a64fx")
320+
else ()
321+
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support A64FX.")
322+
endif()
323+
endif()
324+
endif()
313325

314326
endif()
315327

driver/others/dynamic_arm64.c

+20-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ extern gotoblas_t gotoblas_CORTEXA55;
120120
#else
121121
#define gotoblas_CORTEXA55 gotoblas_ARMV8
122122
#endif
123+
#ifdef DYN_A64FX
124+
extern gotoblas_t gotoblas_A64FX;
125+
#else
126+
#define gotoblas_A64FX gotoblas_ARMV8
127+
#endif
123128
#else
124129
extern gotoblas_t gotoblas_CORTEXA53;
125130
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53
@@ -136,10 +141,12 @@ extern gotoblas_t gotoblas_NEOVERSEN1;
136141
extern gotoblas_t gotoblas_NEOVERSEV1;
137142
extern gotoblas_t gotoblas_NEOVERSEN2;
138143
extern gotoblas_t gotoblas_ARMV8SVE;
144+
extern gotoblas_t gotoblas_A64FX;
139145
#else
140146
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
141147
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
142148
#define gotoblas_ARMV8SVE gotoblas_ARMV8
149+
#define gotoblas_A64FX gotoblas_ARMV8
143150
#endif
144151
extern gotoblas_t gotoblas_THUNDERX3T110;
145152
#endif
@@ -149,7 +156,7 @@ extern void openblas_warning(int verbose, const char * msg);
149156
#define FALLBACK_VERBOSE 1
150157
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
151158

152-
#define NUM_CORETYPES 17
159+
#define NUM_CORETYPES 18
153160

154161
/*
155162
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -184,6 +191,7 @@ static char *corename[] = {
184191
"thunderx3t110",
185192
"cortexa55",
186193
"armv8sve",
194+
"a64fx",
187195
"unknown"
188196
};
189197

@@ -205,6 +213,7 @@ char *gotoblas_corename(void) {
205213
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
206214
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
207215
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
216+
if (gotoblas == &gotoblas_A64FX) return corename[17];
208217
return corename[NUM_CORETYPES];
209218
}
210219

@@ -241,6 +250,7 @@ static gotoblas_t *force_coretype(char *coretype) {
241250
case 14: return (&gotoblas_THUNDERX3T110);
242251
case 15: return (&gotoblas_CORTEXA55);
243252
case 16: return (&gotoblas_ARMV8SVE);
253+
case 17: return (&gotoblas_A64FX);
244254
}
245255
snprintf(message, 128, "Core not found: %s\n", coretype);
246256
openblas_warning(1, message);
@@ -346,6 +356,15 @@ static gotoblas_t *get_coretype(void) {
346356
return &gotoblas_THUNDERX3T110;
347357
}
348358
break;
359+
case 0x46: // Fujitsu
360+
switch (part)
361+
{
362+
#ifndef NO_SVE
363+
case 0x001: // A64FX
364+
return &gotoblas_A64FX;
365+
#endif
366+
}
367+
break;
349368
case 0x48: // HiSilicon
350369
switch (part)
351370
{

0 commit comments

Comments
 (0)