Skip to content

Commit 711ca33

Browse files
Improved Ximatcopy when lda==ldb.
The Ximatcopy functions create a copy of the input matrix although they seem to work inplace. The new routines XIMATCOPY_K_YY perform the operations inplace if the leading dimension does not change.
1 parent 40a3fed commit 711ca33

23 files changed

+1288
-3
lines changed

CONTRIBUTORS.md

+3
Original file line numberDiff line numberDiff line change
@@ -127,5 +127,8 @@ In chronological order:
127127
* Ton van den Heuvel <https://github.com/ton>
128128
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
129129

130+
* Martin Koehler <https://github.com/grisuthedragon/>
131+
* [2015-09-07] Improved imatcopy
132+
130133
* [Your name or handle] <[email or website]>
131134
* [Date] [Brief summary of your changes]

common_c.h

+19
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,15 @@
220220
#define COMATCOPY_K_CTC comatcopy_k_ctc
221221
#define COMATCOPY_K_RTC comatcopy_k_rtc
222222

223+
#define CIMATCOPY_K_CN cimatcopy_k_cn
224+
#define CIMATCOPY_K_RN cimatcopy_k_rn
225+
#define CIMATCOPY_K_CT cimatcopy_k_ct
226+
#define CIMATCOPY_K_RT cimatcopy_k_rt
227+
#define CIMATCOPY_K_CNC cimatcopy_k_cnc
228+
#define CIMATCOPY_K_RNC cimatcopy_k_rnc
229+
#define CIMATCOPY_K_CTC cimatcopy_k_ctc
230+
#define CIMATCOPY_K_RTC cimatcopy_k_rtc
231+
223232
#define CGEADD_K cgeadd_k
224233

225234
#else
@@ -403,6 +412,16 @@
403412
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
404413
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
405414
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
415+
416+
#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn
417+
#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn
418+
#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct
419+
#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt
420+
#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc
421+
#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc
422+
#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc
423+
#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc
424+
406425
#define CGEADD_K gotoblas -> cgeadd_k
407426

408427
#endif

common_d.h

+9
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@
149149
#define DOMATCOPY_K_RN domatcopy_k_rn
150150
#define DOMATCOPY_K_CT domatcopy_k_ct
151151
#define DOMATCOPY_K_RT domatcopy_k_rt
152+
153+
#define DIMATCOPY_K_CN dimatcopy_k_cn
154+
#define DIMATCOPY_K_RN dimatcopy_k_rn
155+
#define DIMATCOPY_K_CT dimatcopy_k_ct
156+
#define DIMATCOPY_K_RT dimatcopy_k_rt
152157
#define DGEADD_K dgeadd_k
153158

154159
#else
@@ -267,6 +272,10 @@
267272
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
268273
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
269274
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
275+
#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn
276+
#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn
277+
#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct
278+
#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt
270279

271280
#define DGEADD_K gotoblas -> dgeadd_k
272281

common_level3.h

+24
Original file line numberDiff line numberDiff line change
@@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS
17361736
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
17371737
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
17381738
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
1739+
int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
1740+
int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
1741+
int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
1742+
int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
17391743

17401744
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
17411745
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
17421746
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
17431747
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
1748+
int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
1749+
int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
1750+
int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
1751+
int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
17441752

17451753
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17461754
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17471755
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17481756
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
1757+
int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1758+
int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1759+
int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1760+
int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
17491761

17501762
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17511763
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17521764
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
17531765
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
1766+
int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1767+
int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1768+
int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
1769+
int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
17541770

17551771
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17561772
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17571773
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17581774
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
1775+
int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1776+
int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1777+
int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1778+
int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
17591779

17601780
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17611781
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17621782
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
17631783
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
1784+
int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1785+
int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1786+
int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
1787+
int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
17641788

17651789
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
17661790
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);

common_macro.h

+26
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,11 @@
634634
#define OMATCOPY_K_RN DOMATCOPY_K_RN
635635
#define OMATCOPY_K_CT DOMATCOPY_K_CT
636636
#define OMATCOPY_K_RT DOMATCOPY_K_RT
637+
#define IMATCOPY_K_CN DIMATCOPY_K_CN
638+
#define IMATCOPY_K_RN DIMATCOPY_K_RN
639+
#define IMATCOPY_K_CT DIMATCOPY_K_CT
640+
#define IMATCOPY_K_RT DIMATCOPY_K_RT
641+
637642
#define GEADD_K DGEADD_K
638643
#else
639644

@@ -931,6 +936,10 @@
931936
#define OMATCOPY_K_RN SOMATCOPY_K_RN
932937
#define OMATCOPY_K_CT SOMATCOPY_K_CT
933938
#define OMATCOPY_K_RT SOMATCOPY_K_RT
939+
#define IMATCOPY_K_CN SIMATCOPY_K_CN
940+
#define IMATCOPY_K_RN SIMATCOPY_K_RN
941+
#define IMATCOPY_K_CT SIMATCOPY_K_CT
942+
#define IMATCOPY_K_RT SIMATCOPY_K_RT
934943

935944
#define GEADD_K SGEADD_K
936945
#endif
@@ -1747,6 +1756,15 @@
17471756
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
17481757
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
17491758
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
1759+
#define IMATCOPY_K_CN ZIMATCOPY_K_CN
1760+
#define IMATCOPY_K_RN ZIMATCOPY_K_RN
1761+
#define IMATCOPY_K_CT ZIMATCOPY_K_CT
1762+
#define IMATCOPY_K_RT ZIMATCOPY_K_RT
1763+
#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC
1764+
#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC
1765+
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC
1766+
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC
1767+
17501768
#define GEADD_K ZGEADD_K
17511769

17521770
#else
@@ -2160,6 +2178,14 @@
21602178
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
21612179
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
21622180
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
2181+
#define IMATCOPY_K_CN CIMATCOPY_K_CN
2182+
#define IMATCOPY_K_RN CIMATCOPY_K_RN
2183+
#define IMATCOPY_K_CT CIMATCOPY_K_CT
2184+
#define IMATCOPY_K_RT CIMATCOPY_K_RT
2185+
#define IMATCOPY_K_CNC CIMATCOPY_K_CNC
2186+
#define IMATCOPY_K_RNC CIMATCOPY_K_RNC
2187+
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC
2188+
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC
21632189

21642190
#define GEADD_K CGEADD_K
21652191

common_param.h

+30
Original file line numberDiff line numberDiff line change
@@ -830,31 +830,61 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
830830
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
831831
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
832832

833+
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
834+
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
835+
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
836+
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
837+
833838
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
834839
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
835840
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
836841
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
837842

843+
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
844+
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
845+
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
846+
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
847+
838848
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
839849
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
840850
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
841851
int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
842852

853+
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
854+
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
855+
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
856+
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
857+
843858
int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
844859
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
845860
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
846861
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
847862

863+
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
864+
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
865+
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
866+
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
867+
848868
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
849869
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
850870
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
851871
int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
852872

873+
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
874+
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
875+
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
876+
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
877+
853878
int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
854879
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
855880
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
856881
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
857882

883+
int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
884+
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
885+
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
886+
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
887+
858888
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
859889
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
860890
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);

common_s.h

+8
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@
152152
#define SOMATCOPY_K_RN somatcopy_k_rn
153153
#define SOMATCOPY_K_CT somatcopy_k_ct
154154
#define SOMATCOPY_K_RT somatcopy_k_rt
155+
#define SIMATCOPY_K_CN simatcopy_k_cn
156+
#define SIMATCOPY_K_RN simatcopy_k_rn
157+
#define SIMATCOPY_K_CT simatcopy_k_ct
158+
#define SIMATCOPY_K_RT simatcopy_k_rt
155159

156160
#define SGEADD_K sgeadd_k
157161

@@ -274,6 +278,10 @@
274278
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn
275279
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
276280
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
281+
#define SIMATCOPY_K_CN gotoblas -> simatcopy_k_cn
282+
#define SIMATCOPY_K_RN gotoblas -> simatcopy_k_rn
283+
#define SIMATCOPY_K_CT gotoblas -> simatcopy_k_ct
284+
#define SIMATCOPY_K_RT gotoblas -> simatcopy_k_rt
277285

278286
#define SGEADD_K gotoblas -> sgeadd_k
279287

common_z.h

+18
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,15 @@
220220
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
221221
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
222222

223+
#define ZIMATCOPY_K_CN zimatcopy_k_cn
224+
#define ZIMATCOPY_K_RN zimatcopy_k_rn
225+
#define ZIMATCOPY_K_CT zimatcopy_k_ct
226+
#define ZIMATCOPY_K_RT zimatcopy_k_rt
227+
#define ZIMATCOPY_K_CNC zimatcopy_k_cnc
228+
#define ZIMATCOPY_K_RNC zimatcopy_k_rnc
229+
#define ZIMATCOPY_K_CTC zimatcopy_k_ctc
230+
#define ZIMATCOPY_K_RTC zimatcopy_k_rtc
231+
223232
#define ZGEADD_K zgeadd_k
224233

225234
#else
@@ -404,6 +413,15 @@
404413
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
405414
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
406415

416+
#define ZIMATCOPY_K_CN gotoblas -> zimatcopy_k_cn
417+
#define ZIMATCOPY_K_RN gotoblas -> zimatcopy_k_rn
418+
#define ZIMATCOPY_K_CT gotoblas -> zimatcopy_k_ct
419+
#define ZIMATCOPY_K_RT gotoblas -> zimatcopy_k_rt
420+
#define ZIMATCOPY_K_CNC gotoblas -> zimatcopy_k_cnc
421+
#define ZIMATCOPY_K_RNC gotoblas -> zimatcopy_k_rnc
422+
#define ZIMATCOPY_K_CTC gotoblas -> zimatcopy_k_ctc
423+
#define ZIMATCOPY_K_RTC gotoblas -> zimatcopy_k_rtc
424+
407425
#define ZGEADD_K gotoblas -> zgeadd_k
408426

409427
#endif

interface/imatcopy.c

+33-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
*****************************************************************************/
2727

2828
/***********************************************************
29-
* 2014/06/10 Saar
29+
* 2014-06-10 Saar
30+
* 2015-09-07 grisuthedragon
3031
***********************************************************/
3132

3233
#include <stdio.h>
@@ -50,6 +51,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5051
#undef malloc
5152
#undef free
5253

54+
/* Enables the New IMATCOPY code with inplace operation if lda == ldb */
55+
#define NEW_IMATCOPY
56+
5357
#ifndef CBLAS
5458
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
5559
{
@@ -75,7 +79,6 @@ void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha,
7579
#else
7680
void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, FLOAT calpha, FLOAT *a, blasint clda, blasint cldb)
7781
{
78-
char Order, Trans;
7982
int order=-1,trans=-1;
8083
blasint info = -1;
8184
FLOAT *b;
@@ -117,6 +120,34 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
117120
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
118121
return;
119122
}
123+
#ifdef NEW_IMATCOPY
124+
if ( *lda == *ldb ) {
125+
if ( order == BlasColMajor )
126+
{
127+
if ( trans == BlasNoTrans )
128+
{
129+
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
130+
}
131+
else
132+
{
133+
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
134+
}
135+
}
136+
else
137+
{
138+
if ( trans == BlasNoTrans )
139+
{
140+
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
141+
}
142+
else
143+
{
144+
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
145+
}
146+
}
147+
return;
148+
}
149+
150+
#endif
120151

121152
if ( *lda > *ldb )
122153
msize = (*lda) * (*ldb) * sizeof(FLOAT);

0 commit comments

Comments
 (0)