Skip to content

Commit 22bb50f

Browse files
author
Rajalakshmi Srinivasaraghavan
committed
cmake fixes
1 parent 67cc4b9 commit 22bb50f

File tree

7 files changed

+287
-6
lines changed

7 files changed

+287
-6
lines changed

CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ endif ()
8989
# set which float types we want to build for
9090
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
9191
# if none are defined, build for all
92+
set(BUILD_HALF true)
9293
set(BUILD_SINGLE true)
9394
set(BUILD_DOUBLE true)
9495
set(BUILD_COMPLEX true)
@@ -120,6 +121,11 @@ if (BUILD_COMPLEX16)
120121
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
121122
endif ()
122123

124+
if (BUILD_SINGLE OR BUILD_HALF)
125+
message(STATUS "Building Half Precision")
126+
list(APPEND FLOAT_TYPES "HALF") # defines nothing
127+
endif ()
128+
123129
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
124130
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
125131
endif ()

cmake/kernel.cmake

+37-2
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,29 @@ macro(SetDefaultL1)
113113
set(ZSUMKERNEL zsum.S)
114114
set(QSUMKERNEL sum.S)
115115
set(XSUMKERNEL zsum.S)
116+
set(SHAMINKERNEL ../arm/amin.c)
117+
set(SHAMAXKERNEL amax.S)
118+
set(SHMAXKERNEL ../arm/max.c)
119+
set(SHMINKERNEL ../arm/min.c)
120+
set(ISHAMAXKERNEL iamax.S)
121+
set(ISHAMINKERNEL ../arm/iamin.c)
122+
set(ISHMAXKERNEL ../arm/imax.c)
123+
set(ISHMINKERNEL ../arm/imin.c)
124+
set(SHASUMKERNEL asum.S)
125+
set(SHAXPYKERNEL axpy.S)
126+
set(SHAXPBYKERNEL ../arm/axpby.c)
127+
set(SHCOPYKERNEL copy.S)
128+
set(SHDOTKERNEL dot.S)
129+
set(SHROTKERNEL rot.S)
130+
set(SHSCALKERNEL scal.S)
131+
set(SHNRM2KERNEL nrm2.S)
132+
set(SHSUMKERNEL sum.S)
133+
set(SHSWAPKERNEL swap.S)
116134
endmacro ()
117135

118136
macro(SetDefaultL2)
119-
set(SGEMVNKERNEL gemv_n.S)
120-
set(SGEMVTKERNEL gemv_t.S)
137+
set(SGEMVNKERNEL ../arm/gemv_n.c)
138+
set(SGEMVTKERNEL ../arm/gemv_t.c)
121139
set(DGEMVNKERNEL gemv_n.S)
122140
set(DGEMVTKERNEL gemv_t.S)
123141
set(CGEMVNKERNEL zgemv_n.S)
@@ -161,11 +179,28 @@ macro(SetDefaultL2)
161179
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
162180
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
163181
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
182+
set(SHGEMVNKERNEL ../arm/gemv_n.c)
183+
set(SHGEMVTKERNEL ../arm/gemv_t.c)
184+
set(SHGERKERNEL ../generic/ger.c)
185+
164186
endmacro ()
165187

166188
macro(SetDefaultL3)
167189
set(SGEADD_KERNEL ../generic/geadd.c)
168190
set(DGEADD_KERNEL ../generic/geadd.c)
169191
set(CGEADD_KERNEL ../generic/zgeadd.c)
170192
set(ZGEADD_KERNEL ../generic/zgeadd.c)
193+
set(SHGEADD_KERNEL ../generic/geadd.c)
194+
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
195+
set(SHGEMM_BETA ../generic/gemm_beta.c)
196+
set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
197+
set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
198+
set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
199+
set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
200+
set(SHGEMMINCOPYOBJ shgemm_incopy.o)
201+
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
202+
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
203+
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
204+
205+
171206
endmacro ()

cmake/utils.cmake

+7
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ function(GenerateNamedObjects sources_in)
163163
if (complex_only)
164164
list(REMOVE_ITEM float_list "SINGLE")
165165
list(REMOVE_ITEM float_list "DOUBLE")
166+
list(REMOVE_ITEM float_list "HALF")
166167
elseif (real_only)
167168
list(REMOVE_ITEM float_list "COMPLEX")
168169
list(REMOVE_ITEM float_list "ZCOMPLEX")
@@ -176,6 +177,9 @@ function(GenerateNamedObjects sources_in)
176177
if (NOT no_float_type)
177178
string(SUBSTRING ${float_type} 0 1 float_char)
178179
string(TOLOWER ${float_char} float_char)
180+
if (${float_type} STREQUAL "HALF")
181+
set (float_char "sh")
182+
endif ()
179183
endif ()
180184

181185
if (NOT name_in)
@@ -210,6 +214,9 @@ function(GenerateNamedObjects sources_in)
210214
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
211215
list(APPEND obj_defines "DOUBLE")
212216
endif ()
217+
if (${float_type} STREQUAL "HALF")
218+
list(APPEND obj_defines "HALF")
219+
endif ()
213220
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
214221
list(APPEND obj_defines "COMPLEX")
215222
if (mangle_complex_sources)

common_macro.h

+210-3
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,19 @@
646646

647647
#elif defined(HALF)
648648

649+
#define AXPYU_K SAXPYU_K
650+
#define AXPYC_K SAXPYC_K
651+
#define SCAL_K SSCAL_K
652+
#define GEMV_N SGEMV_N
653+
#define GEMV_T SGEMV_T
654+
#define SYMV_U SSYMV_U
655+
#define SYMV_L SSYMV_L
656+
#define GERU_K SGERU_K
657+
#define GERC_K SGERC_K
658+
#define GERV_K SGERV_K
659+
#define GERD_K SGERD_K
660+
#define SYMV_THREAD_U SSYMV_THREAD_U
661+
#define SYMV_THREAD_L SSYMV_THREAD_L
649662
#define GEMM_BETA SHGEMM_BETA
650663
#define GEMM_KERNEL_N SHGEMM_KERNEL
651664
#define GEMM_KERNEL_L SHGEMM_KERNEL
@@ -672,6 +685,20 @@
672685
#define GEMM_OTCOPY SHGEMM_OTCOPY
673686
#define GEMM_INCOPY SHGEMM_INCOPY
674687
#define GEMM_ITCOPY SHGEMM_ITCOPY
688+
#define SYMM_THREAD_LU SSYMM_THREAD_LU
689+
#define SYMM_THREAD_LL SSYMM_THREAD_LL
690+
#define SYMM_THREAD_RU SSYMM_THREAD_RU
691+
#define SYMM_THREAD_RL SSYMM_THREAD_RL
692+
#define SYMM_LU SSYMM_LU
693+
#define SYMM_LL SSYMM_LL
694+
#define SYMM_RU SSYMM_RU
695+
#define SYMM_RL SSYMM_RL
696+
697+
698+
#define HEMM_THREAD_LU SHEMM_THREAD_LU
699+
#define HEMM_THREAD_LL SHEMM_THREAD_LL
700+
#define HEMM_THREAD_RU SHEMM_THREAD_RU
701+
#define HEMM_THREAD_RL SHEMM_THREAD_RL
675702

676703
#define GEMM_THREAD_NN SHGEMM_THREAD_NN
677704
#define GEMM_THREAD_CN SHGEMM_THREAD_TN
@@ -690,6 +717,186 @@
690717
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
691718
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
692719

720+
#ifdef UNIT
721+
722+
#define TRMM_OUNCOPY STRMM_OUNUCOPY
723+
#define TRMM_OUTCOPY STRMM_OUTUCOPY
724+
#define TRMM_OLNCOPY STRMM_OLNUCOPY
725+
#define TRMM_OLTCOPY STRMM_OLTUCOPY
726+
#define TRSM_OUNCOPY STRSM_OUNUCOPY
727+
#define TRSM_OUTCOPY STRSM_OUTUCOPY
728+
#define TRSM_OLNCOPY STRSM_OLNUCOPY
729+
#define TRSM_OLTCOPY STRSM_OLTUCOPY
730+
731+
#define TRMM_IUNCOPY STRMM_IUNUCOPY
732+
#define TRMM_IUTCOPY STRMM_IUTUCOPY
733+
#define TRMM_ILNCOPY STRMM_ILNUCOPY
734+
#define TRMM_ILTCOPY STRMM_ILTUCOPY
735+
#define TRSM_IUNCOPY STRSM_IUNUCOPY
736+
#define TRSM_IUTCOPY STRSM_IUTUCOPY
737+
#define TRSM_ILNCOPY STRSM_ILNUCOPY
738+
#define TRSM_ILTCOPY STRSM_ILTUCOPY
739+
740+
#else
741+
742+
#define TRMM_OUNCOPY STRMM_OUNNCOPY
743+
#define TRMM_OUTCOPY STRMM_OUTNCOPY
744+
#define TRMM_OLNCOPY STRMM_OLNNCOPY
745+
#define TRMM_OLTCOPY STRMM_OLTNCOPY
746+
#define TRSM_OUNCOPY STRSM_OUNNCOPY
747+
#define TRSM_OUTCOPY STRSM_OUTNCOPY
748+
#define TRSM_OLNCOPY STRSM_OLNNCOPY
749+
#define TRSM_OLTCOPY STRSM_OLTNCOPY
750+
751+
#define TRMM_IUNCOPY STRMM_IUNNCOPY
752+
#define TRMM_IUTCOPY STRMM_IUTNCOPY
753+
#define TRMM_ILNCOPY STRMM_ILNNCOPY
754+
#define TRMM_ILTCOPY STRMM_ILTNCOPY
755+
#define TRSM_IUNCOPY STRSM_IUNNCOPY
756+
#define TRSM_IUTCOPY STRSM_IUTNCOPY
757+
#define TRSM_ILNCOPY STRSM_ILNNCOPY
758+
#define TRSM_ILTCOPY STRSM_ILTNCOPY
759+
760+
#define TRMM_KERNEL_LN STRMM_KERNEL_LN
761+
#define TRMM_KERNEL_LT STRMM_KERNEL_LT
762+
#define TRMM_KERNEL_LR STRMM_KERNEL_LN
763+
#define TRMM_KERNEL_LC STRMM_KERNEL_LT
764+
#define TRMM_KERNEL_RN STRMM_KERNEL_RN
765+
#define TRMM_KERNEL_RT STRMM_KERNEL_RT
766+
#define TRMM_KERNEL_RR STRMM_KERNEL_RN
767+
#define TRMM_KERNEL_RC STRMM_KERNEL_RT
768+
769+
#define TRSM_KERNEL_LN STRSM_KERNEL_LN
770+
#define TRSM_KERNEL_LT STRSM_KERNEL_LT
771+
#define TRSM_KERNEL_LR STRSM_KERNEL_LN
772+
#define TRSM_KERNEL_LC STRSM_KERNEL_LT
773+
#define TRSM_KERNEL_RN STRSM_KERNEL_RN
774+
#define TRSM_KERNEL_RT STRSM_KERNEL_RT
775+
#define TRSM_KERNEL_RR STRSM_KERNEL_RN
776+
#define TRSM_KERNEL_RC STRSM_KERNEL_RT
777+
778+
#define SYMM_IUTCOPY SSYMM_IUTCOPY
779+
#define SYMM_ILTCOPY SSYMM_ILTCOPY
780+
#define SYMM_OUTCOPY SSYMM_OUTCOPY
781+
#define SYMM_OLTCOPY SSYMM_OLTCOPY
782+
#define TRMM_LNUU STRMM_LNUU
783+
#define TRMM_LNUN STRMM_LNUN
784+
#define TRMM_LNLU STRMM_LNLU
785+
#define TRMM_LNLN STRMM_LNLN
786+
#define TRMM_LTUU STRMM_LTUU
787+
#define TRMM_LTUN STRMM_LTUN
788+
#define TRMM_LTLU STRMM_LTLU
789+
#define TRMM_LTLN STRMM_LTLN
790+
#define TRMM_LRUU STRMM_LNUU
791+
#define TRMM_LRUN STRMM_LNUN
792+
#define TRMM_LRLU STRMM_LNLU
793+
#define TRMM_LRLN STRMM_LNLN
794+
#define TRMM_LCUU STRMM_LTUU
795+
#define TRMM_LCUN STRMM_LTUN
796+
#define TRMM_LCLU STRMM_LTLU
797+
#define TRMM_LCLN STRMM_LTLN
798+
#define TRMM_RNUU STRMM_RNUU
799+
#define TRMM_RNUN STRMM_RNUN
800+
#define TRMM_RNLU STRMM_RNLU
801+
#define TRMM_RNLN STRMM_RNLN
802+
#define TRMM_RTUU STRMM_RTUU
803+
#define TRMM_RTUN STRMM_RTUN
804+
#define TRMM_RTLU STRMM_RTLU
805+
#define TRMM_RTLN STRMM_RTLN
806+
#define TRMM_RRUU STRMM_RNUU
807+
#define TRMM_RRUN STRMM_RNUN
808+
#define TRMM_RRLU STRMM_RNLU
809+
#define TRMM_RRLN STRMM_RNLN
810+
#define TRMM_RCUU STRMM_RTUU
811+
#define TRMM_RCUN STRMM_RTUN
812+
#define TRMM_RCLU STRMM_RTLU
813+
#define TRMM_RCLN STRMM_RTLN
814+
815+
#define TRSM_LNUU STRSM_LNUU
816+
#define TRSM_LNUN STRSM_LNUN
817+
#define TRSM_LNLU STRSM_LNLU
818+
#define TRSM_LNLN STRSM_LNLN
819+
#define TRSM_LTUU STRSM_LTUU
820+
#define TRSM_LTUN STRSM_LTUN
821+
#define TRSM_LTLU STRSM_LTLU
822+
#define TRSM_LTLN STRSM_LTLN
823+
#define TRSM_LRUU STRSM_LNUU
824+
#define TRSM_LRUN STRSM_LNUN
825+
#define TRSM_LRLU STRSM_LNLU
826+
#define TRSM_LRLN STRSM_LNLN
827+
#define TRSM_LCUU STRSM_LTUU
828+
#define TRSM_LCUN STRSM_LTUN
829+
#define TRSM_LCLU STRSM_LTLU
830+
#define TRSM_LCLN STRSM_LTLN
831+
#define TRSM_RNUU STRSM_RNUU
832+
#define TRSM_RNUN STRSM_RNUN
833+
#define TRSM_RNLU STRSM_RNLU
834+
#define TRSM_RNLN STRSM_RNLN
835+
#define TRSM_RTUU STRSM_RTUU
836+
#define TRSM_RTUN STRSM_RTUN
837+
#define TRSM_RTLU STRSM_RTLU
838+
#define TRSM_RTLN STRSM_RTLN
839+
#define TRSM_RRUU STRSM_RNUU
840+
#define TRSM_RRUN STRSM_RNUN
841+
#define TRSM_RRLU STRSM_RNLU
842+
#define TRSM_RRLN STRSM_RNLN
843+
#define TRSM_RCUU STRSM_RTUU
844+
#define TRSM_RCUN STRSM_RTUN
845+
#define TRSM_RCLU STRSM_RTLU
846+
#define TRSM_RCLN STRSM_RTLN
847+
#define SYRK_UN SSYRK_UN
848+
#define SYRK_UT SSYRK_UT
849+
#define SYRK_LN SSYRK_LN
850+
#define SYRK_LT SSYRK_LT
851+
#define SYRK_UR SSYRK_UN
852+
#define SYRK_UC SSYRK_UT
853+
#define SYRK_LR SSYRK_LN
854+
#define SYRK_LC SSYRK_LT
855+
856+
#define SYRK_KERNEL_U SSYRK_KERNEL_U
857+
#define SYRK_KERNEL_L SSYRK_KERNEL_L
858+
859+
#define HERK_UN SSYRK_UN
860+
#define HERK_LN SSYRK_LN
861+
#define HERK_UC SSYRK_UT
862+
#define HERK_LC SSYRK_LT
863+
864+
#define HER2K_UN SSYR2K_UN
865+
#define HER2K_LN SSYR2K_LN
866+
#define HER2K_UC SSYR2K_UT
867+
#define HER2K_LC SSYR2K_LT
868+
869+
#define SYR2K_UN SSYR2K_UN
870+
#define SYR2K_UT SSYR2K_UT
871+
#define SYR2K_LN SSYR2K_LN
872+
#define SYR2K_LT SSYR2K_LT
873+
#define SYR2K_UR SSYR2K_UN
874+
#define SYR2K_UC SSYR2K_UT
875+
#define SYR2K_LR SSYR2K_LN
876+
#define SYR2K_LC SSYR2K_LT
877+
878+
#define SYR2K_KERNEL_U SSYR2K_KERNEL_U
879+
#define SYR2K_KERNEL_L SSYR2K_KERNEL_L
880+
#define SYRK_THREAD_UN SSYRK_THREAD_UN
881+
#define SYRK_THREAD_UT SSYRK_THREAD_UT
882+
#define SYRK_THREAD_LN SSYRK_THREAD_LN
883+
#define SYRK_THREAD_LT SSYRK_THREAD_LT
884+
#define SYRK_THREAD_UR SSYRK_THREAD_UR
885+
#define SYRK_THREAD_UC SSYRK_THREAD_UC
886+
#define SYRK_THREAD_LR SSYRK_THREAD_LN
887+
#define SYRK_THREAD_LC SSYRK_THREAD_LT
888+
889+
#define HERK_THREAD_UN SSYRK_THREAD_UN
890+
#define HERK_THREAD_UT SSYRK_THREAD_UT
891+
#define HERK_THREAD_LN SSYRK_THREAD_LN
892+
#define HERK_THREAD_LT SSYRK_THREAD_LT
893+
#define HERK_THREAD_UR SSYRK_THREAD_UR
894+
#define HERK_THREAD_UC SSYRK_THREAD_UC
895+
#define HERK_THREAD_LR SSYRK_THREAD_LN
896+
#define HERK_THREAD_LC SSYRK_THREAD_LT
897+
898+
#endif
899+
693900
#else
694901

695902
#define AMAX_K SAMAX_K
@@ -721,14 +928,14 @@
721928
#define GEMV_S SGEMV_S
722929
#define GEMV_D SGEMV_D
723930

931+
932+
#define SYMV_U SSYMV_U
933+
#define SYMV_L SSYMV_L
724934
#define GERU_K SGERU_K
725935
#define GERC_K SGERC_K
726936
#define GERV_K SGERV_K
727937
#define GERD_K SGERD_K
728938

729-
#define SYMV_U SSYMV_U
730-
#define SYMV_L SSYMV_L
731-
732939
#define SYMV_THREAD_U SSYMV_THREAD_U
733940
#define SYMV_THREAD_L SSYMV_THREAD_L
734941

ctest/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
1212
foreach(float_type ${FLOAT_TYPES})
1313
string(SUBSTRING ${float_type} 0 1 float_char_upper)
1414
string(TOLOWER ${float_char_upper} float_char)
15+
if (${float_char} STREQUAL "h")
16+
continue()
17+
endif()
1518
#level1
1619
add_executable(x${float_char}cblat1
1720
c_${float_char}blat1.f

0 commit comments

Comments
 (0)