Skip to content

Commit b4495a8

Browse files
authored
Merge branch 'develop' into arm64_cmake_small_matrix_opt
2 parents 7087b0a + 68eefe6 commit b4495a8

26 files changed

+602
-146
lines changed

.github/workflows/loongarch64.yml

+9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ jobs:
2323
- target: LOONGSON2K1000
2424
triple: loongarch64-unknown-linux-gnu
2525
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
26+
- target: LA64_GENERIC
27+
triple: loongarch64-unknown-linux-gnu
28+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
29+
- target: LA464
30+
triple: loongarch64-unknown-linux-gnu
31+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
32+
- target: LA264
33+
triple: loongarch64-unknown-linux-gnu
34+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
2635
- target: DYNAMIC_ARCH
2736
triple: loongarch64-unknown-linux-gnu
2837
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

.github/workflows/loongarch64_clang.yml

+6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ jobs:
2020
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
2121
- target: LOONGSON2K1000
2222
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
23+
- target: LA64_GENERIC
24+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
25+
- target: LA464
26+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
27+
- target: LA264
28+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
2329
- target: DYNAMIC_ARCH
2430
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
2531

Makefile.install

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ endif
1414
ifeq ($(INTERFACE64),1)
1515
USE_64BITINT=1
1616
endif
17+
ifeq ($(USE_OPENMP),1)
18+
FOMP_OPT:= -fopenmp
19+
endif
1720

1821
PREFIX ?= /opt/OpenBLAS
1922

@@ -178,6 +181,7 @@ endif
178181
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
179182
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
180183
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
184+
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
181185
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
182186
@echo 'version='$(VERSION) >> "$(PKGFILE)"
183187
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"

Makefile.system

+3-3
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ endif
727727
endif
728728

729729
ifeq ($(ARCH), loongarch64)
730-
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
730+
DYNAMIC_CORE = LA64_GENERIC LA264 LA464
731731
endif
732732

733733
ifeq ($(ARCH), riscv64)
@@ -1720,8 +1720,8 @@ LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx
17201720
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
17211721
endif
17221722
ifeq ($(F_COMPILER),FLANGNEW)
1723-
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
1724-
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
1723+
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
1724+
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
17251725
endif
17261726

17271727
LAPACK_CFLAGS = $(CFLAGS)

TargetList.txt

+9-1
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,17 @@ x280
126126
RISCV64_ZVL256B
127127

128128
11.LOONGARCH64:
129+
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
130+
// and it is recommended to use the more standardized naming conventions
131+
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
132+
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
133+
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
129134
LOONGSONGENERIC
130-
LOONGSON3R5
131135
LOONGSON2K1000
136+
LOONGSON3R5
137+
LA64_GENERIC
138+
LA264
139+
LA464
132140

133141
12. Elbrus E2000:
134142
E2K

cblas.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -407,13 +407,13 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
407407
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
408408
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
409409

410-
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
410+
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
411411
float *c, OPENBLAS_CONST blasint cldc);
412-
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
412+
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
413413
double *c, OPENBLAS_CONST blasint cldc);
414-
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
414+
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
415415
float *c, OPENBLAS_CONST blasint cldc);
416-
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
416+
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
417417
double *c, OPENBLAS_CONST blasint cldc);
418418

419419
void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,

cmake/arch.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ if (DYNAMIC_ARCH)
9494
endif ()
9595
endif ()
9696

97+
if (LOONGARCH64)
98+
set(DYNAMIC_CORE LOONGSONGENERIC LOONGSON2K1000 LOONGSON3R5)
99+
endif ()
100+
97101
if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h)
98102
message (FATAL_ERROR "Your build directory contains a file config_kernel.h, probably from a previous compilation with make. This will conflict with the cmake compilation and cause strange compiler errors - please remove the file before trying again")
99103
endif ()

cmake/fc.cmake

+15-11
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,25 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
6161
endif ()
6262
if (LOONGARCH64)
6363
if (BINARY64)
64-
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
65-
if(COMPILER_SUPPORT_LP64D_ABI)
66-
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
67-
else()
68-
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
69-
endif ()
64+
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
65+
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
66+
if(COMPILER_SUPPORT_LP64D_ABI)
67+
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
68+
else()
69+
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
70+
endif ()
71+
endif ()
7072
if (INTERFACE64)
7173
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
7274
endif ()
7375
else ()
74-
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
75-
if(COMPILER_SUPPORT_ILP32D_ABI)
76-
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
77-
else()
78-
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
76+
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
77+
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
78+
if(COMPILER_SUPPORT_ILP32D_ABI)
79+
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
80+
else()
81+
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
82+
endif ()
7983
endif ()
8084
endif ()
8185
endif ()

cmake/openblas.pc.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ Name: OpenBLAS
99
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
1010
Version: @OpenBLAS_VERSION@
1111
URL: https://github.com/OpenMathLib/OpenBLAS
12-
Libs: @OpenMP_C_FLAGS@ -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
13-
Cflags: -I${includedir}
12+
Libs: -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
13+
Cflags: -I${includedir} @OpenMP_C_FLAGS@

cmake/prebuild.cmake

+48
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,54 @@ endif ()
13491349
"#define DTB_DEFAULT_ENTRIES 128\n"
13501350
"#define DTB_SIZE 4096\n"
13511351
"#define L2_ASSOCIATIVE 4\n")
1352+
elseif ("${TCORE}" STREQUAL "LOONGSONGENERIC")
1353+
file(APPEND ${TARGET_CONF_TEMP}
1354+
"#define DTB_DEFAULT_ENTRIES 64\n")
1355+
set(SGEMM_UNROLL_M 2)
1356+
set(SGEMM_UNROLL_N 8)
1357+
set(DGEMM_UNROLL_M 2)
1358+
set(DGEMM_UNROLL_N 8)
1359+
set(CGEMM_UNROLL_M 1)
1360+
set(CGEMM_UNROLL_N 4)
1361+
set(ZGEMM_UNROLL_M 1)
1362+
set(ZGEMM_UNROLL_N 4)
1363+
set(CGEMM3M_UNROLL_M 2)
1364+
set(CGEMM3M_UNROLL_N 8)
1365+
set(ZGEMM3M_UNROLL_M 2)
1366+
set(ZGEMM3M_UNROLL_N 8)
1367+
elseif ("${TCORE}" STREQUAL "LOONGSON2K1000")
1368+
file(APPEND ${TARGET_CONF_TEMP}
1369+
"#define DTB_DEFAULT_ENTRIES 64\n")
1370+
set(HAVE_LSX 1)
1371+
set(SGEMM_UNROLL_M 2)
1372+
set(SGEMM_UNROLL_N 8)
1373+
set(DGEMM_UNROLL_M 8)
1374+
set(DGEMM_UNROLL_N 4)
1375+
set(CGEMM_UNROLL_M 8)
1376+
set(CGEMM_UNROLL_N 4)
1377+
set(ZGEMM_UNROLL_M 4)
1378+
set(ZGEMM_UNROLL_N 4)
1379+
set(CGEMM3M_UNROLL_M 2)
1380+
set(CGEMM3M_UNROLL_N 8)
1381+
set(ZGEMM3M_UNROLL_M 8)
1382+
set(ZGEMM3M_UNROLL_N 4)
1383+
elseif ("${TCORE}" STREQUAL "LOONGSON3R5")
1384+
file(APPEND ${TARGET_CONF_TEMP}
1385+
"#define DTB_DEFAULT_ENTRIES 64\n")
1386+
set(HAVE_LASX 1)
1387+
set(HAVE_LSX 1)
1388+
set(SGEMM_UNROLL_M 16)
1389+
set(SGEMM_UNROLL_N 8)
1390+
set(DGEMM_UNROLL_M 16)
1391+
set(DGEMM_UNROLL_N 6)
1392+
set(CGEMM_UNROLL_M 16)
1393+
set(CGEMM_UNROLL_N 4)
1394+
set(ZGEMM_UNROLL_M 8)
1395+
set(ZGEMM_UNROLL_N 4)
1396+
set(CGEMM3M_UNROLL_M 16)
1397+
set(CGEMM3M_UNROLL_N 8)
1398+
set(ZGEMM3M_UNROLL_M 16)
1399+
set(ZGEMM3M_UNROLL_N 6)
13521400
endif()
13531401
set(SBGEMM_UNROLL_M 8)
13541402
set(SBGEMM_UNROLL_N 4)

cmake/system.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ if (NEED_PIC)
388388
endif()
389389
endif ()
390390

391-
if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64)
391+
if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64 OR LOONGARCH64)
392392
set(SMALL_MATRIX_OPT TRUE)
393393
endif ()
394394
if (ARM64)
@@ -403,7 +403,7 @@ if (SMALL_MATRIX_OPT)
403403
endif ()
404404

405405
if (DYNAMIC_ARCH)
406-
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64)
406+
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64 OR LOONGARCH64)
407407
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
408408
if (DYNAMIC_OLDER)
409409
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")

cmake/system_check.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ elseif(ARM)
104104
set(ARCH "arm")
105105
elseif(ARM64)
106106
set(ARCH "arm64")
107+
elseif(LOONGARCH64)
108+
set(ARCH "loongarch64")
107109
else()
108110
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
109111
endif ()

common_loongarch64.h

+4
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,13 @@ REALNAME: ;\
281281
#define GNUSTACK
282282
#endif /* defined(__linux__) && defined(__ELF__) */
283283

284+
#ifdef __clang__
285+
#define EPILOGUE .end
286+
#else
284287
#define EPILOGUE \
285288
.end REALNAME ;\
286289
GNUSTACK
290+
#endif
287291

288292
#define PROFCODE
289293

0 commit comments

Comments
 (0)