|
| 1 | +/*************************************************************************** |
| 2 | +Copyright (c) 2013-2016, The OpenBLAS Project |
| 3 | +All rights reserved. |
| 4 | +Redistribution and use in source and binary forms, with or without |
| 5 | +modification, are permitted provided that the following conditions are |
| 6 | +met: |
| 7 | +1. Redistributions of source code must retain the above copyright |
| 8 | +notice, this list of conditions and the following disclaimer. |
| 9 | +2. Redistributions in binary form must reproduce the above copyright |
| 10 | +notice, this list of conditions and the following disclaimer in |
| 11 | +the documentation and/or other materials provided with the |
| 12 | +distribution. |
| 13 | +3. Neither the name of the OpenBLAS project nor the names of |
| 14 | +its contributors may be used to endorse or promote products |
| 15 | +derived from this software without specific prior written permission. |
| 16 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE |
| 20 | +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 21 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 22 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 23 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
| 25 | +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | +*****************************************************************************/ |
| 27 | + |
| 28 | +/************************************************************************************** |
| 29 | +* 2016/04/21 Werner Saar (wernsaar@googlemail.com) |
| 30 | +* BLASTEST : OK |
| 31 | +* CTEST : OK |
| 32 | +* TEST : OK |
| 33 | +* LAPACK-TEST : OK |
| 34 | +**************************************************************************************/ |
| 35 | + |
| 36 | +/*********************************************************************/ |
| 37 | +/* Copyright 2009, 2010 The University of Texas at Austin. */ |
| 38 | +/* All rights reserved. */ |
| 39 | +/* */ |
| 40 | +/* Redistribution and use in source and binary forms, with or */ |
| 41 | +/* without modification, are permitted provided that the following */ |
| 42 | +/* conditions are met: */ |
| 43 | +/* */ |
| 44 | +/* 1. Redistributions of source code must retain the above */ |
| 45 | +/* copyright notice, this list of conditions and the following */ |
| 46 | +/* disclaimer. */ |
| 47 | +/* */ |
| 48 | +/* 2. Redistributions in binary form must reproduce the above */ |
| 49 | +/* copyright notice, this list of conditions and the following */ |
| 50 | +/* disclaimer in the documentation and/or other materials */ |
| 51 | +/* provided with the distribution. */ |
| 52 | +/* */ |
| 53 | +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
| 54 | +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
| 55 | +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
| 56 | +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
| 57 | +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
| 58 | +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
| 59 | +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
| 60 | +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
| 61 | +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
| 62 | +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
| 63 | +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
| 64 | +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
| 65 | +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
| 66 | +/* POSSIBILITY OF SUCH DAMAGE. */ |
| 67 | +/* */ |
| 68 | +/* The views and conclusions contained in the software and */ |
| 69 | +/* documentation are those of the authors and should not be */ |
| 70 | +/* interpreted as representing official policies, either expressed */ |
| 71 | +/* or implied, of The University of Texas at Austin. */ |
| 72 | +/*********************************************************************/ |
| 73 | + |
| 74 | +#define ASSEMBLER |
| 75 | +#include "common.h" |
| 76 | +#include "def_vsx.h" |
| 77 | + |
| 78 | +#define M r3 |
| 79 | +#define N r4 |
| 80 | +#define A r5 |
| 81 | +#define LDA r6 |
| 82 | +#define B r7 |
| 83 | + |
| 84 | +#define A0 r8 |
| 85 | +#define A1 r9 |
| 86 | +#define A2 r10 |
| 87 | +#define A3 r11 |
| 88 | + |
| 89 | +#define J r12 |
| 90 | + |
| 91 | +#define PREA r14 |
| 92 | +#define PREB r15 |
| 93 | +#define BO r16 |
| 94 | +#define B8 r17 |
| 95 | +#define B4 r18 |
| 96 | +#define B2 r19 |
| 97 | +#define B1 r20 |
| 98 | +#define o8 r21 |
| 99 | +#define T2 r22 |
| 100 | +#define I r23 |
| 101 | +#define o16 r24 |
| 102 | +#define o32 r25 |
| 103 | +#define o48 r26 |
| 104 | +#define B16 r29 |
| 105 | +#define M16 r30 |
| 106 | +#define T1 r31 |
| 107 | + |
| 108 | +#define o0 0 |
| 109 | + |
| 110 | +#include "dgemm_tcopy_macros_16_power8.S" |
| 111 | + |
| 112 | +#define STACKSIZE 384 |
| 113 | + |
| 114 | + |
| 115 | + PROLOGUE |
| 116 | + PROFCODE |
| 117 | + |
| 118 | + addi SP, SP, -STACKSIZE |
| 119 | + li r0, 0 |
| 120 | + |
| 121 | + std r31, 144(SP) |
| 122 | + std r30, 152(SP) |
| 123 | + std r29, 160(SP) |
| 124 | + std r28, 168(SP) |
| 125 | + std r27, 176(SP) |
| 126 | + std r26, 184(SP) |
| 127 | + std r25, 192(SP) |
| 128 | + std r24, 200(SP) |
| 129 | + std r23, 208(SP) |
| 130 | + std r22, 216(SP) |
| 131 | + std r21, 224(SP) |
| 132 | + std r20, 232(SP) |
| 133 | + std r19, 240(SP) |
| 134 | + std r18, 248(SP) |
| 135 | + std r17, 256(SP) |
| 136 | + std r16, 264(SP) |
| 137 | + std r15, 272(SP) |
| 138 | + std r14, 280(SP) |
| 139 | + |
| 140 | + cmpwi cr0, M, 0 |
| 141 | + ble- L999 |
| 142 | + cmpwi cr0, N, 0 |
| 143 | + ble- L999 |
| 144 | + |
| 145 | + slwi LDA, LDA, BASE_SHIFT |
| 146 | + slwi M16, M, 4 + BASE_SHIFT |
| 147 | + |
| 148 | + li T1, -16 |
| 149 | + li T2, -8 |
| 150 | + li PREA, -4 |
| 151 | + li PREB, -2 |
| 152 | + |
| 153 | + and B8, N, T1 |
| 154 | + and B4, N, T2 |
| 155 | + and B2, N, PREA |
| 156 | + and B1, N, PREB |
| 157 | + |
| 158 | + mullw B8, B8, M |
| 159 | + mullw B4, B4, M |
| 160 | + mullw B2, B2, M |
| 161 | + mullw B1, B1, M |
| 162 | + |
| 163 | + slwi B8, B8, BASE_SHIFT |
| 164 | + slwi B4, B4, BASE_SHIFT |
| 165 | + slwi B2, B2, BASE_SHIFT |
| 166 | + slwi B1, B1, BASE_SHIFT |
| 167 | + |
| 168 | + add B8, B8, B |
| 169 | + add B4, B4, B |
| 170 | + add B2, B2, B |
| 171 | + add B1, B1, B |
| 172 | + |
| 173 | + li PREA, 768 |
| 174 | + addi PREB, M16, 128 |
| 175 | + |
| 176 | + li o8, 8 |
| 177 | + li o16, 16 |
| 178 | + li o32, 32 |
| 179 | + li o48, 48 |
| 180 | + |
| 181 | +#include "dgemm_tcopy_logic_16_power8.S" |
| 182 | + |
| 183 | +L999: |
| 184 | + |
| 185 | + li r3, 0 |
| 186 | + |
| 187 | + ld r31, 144(SP) |
| 188 | + ld r30, 152(SP) |
| 189 | + ld r29, 160(SP) |
| 190 | + ld r28, 168(SP) |
| 191 | + ld r27, 176(SP) |
| 192 | + ld r26, 184(SP) |
| 193 | + ld r25, 192(SP) |
| 194 | + ld r24, 200(SP) |
| 195 | + ld r23, 208(SP) |
| 196 | + ld r22, 216(SP) |
| 197 | + ld r21, 224(SP) |
| 198 | + ld r20, 232(SP) |
| 199 | + ld r19, 240(SP) |
| 200 | + ld r18, 248(SP) |
| 201 | + ld r17, 256(SP) |
| 202 | + ld r16, 264(SP) |
| 203 | + ld r15, 272(SP) |
| 204 | + ld r14, 280(SP) |
| 205 | + |
| 206 | + addi SP, SP, STACKSIZE |
| 207 | + |
| 208 | + blr |
| 209 | + EPILOGUE |
| 210 | + |
| 211 | + |
0 commit comments