1- /***************************************************************************
2- Copyright (c) 2013-2016, The OpenBLAS Project
3- All rights reserved.
4- Redistribution and use in source and binary forms, with or without
5- modification, are permitted provided that the following conditions are
6- met:
7- 1. Redistributions of source code must retain the above copyright
8- notice, this list of conditions and the following disclaimer.
9- 2. Redistributions in binary form must reproduce the above copyright
10- notice, this list of conditions and the following disclaimer in
11- the documentation and/or other materials provided with the
12- distribution.
13- 3. Neither the name of the OpenBLAS project nor the names of
14- its contributors may be used to endorse or promote products
15- derived from this software without specific prior written permission.
16- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19- ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26- *****************************************************************************/
27-
28- /**************************************************************************************
29- * 2016/03/05 Werner Saar (wernsaar@googlemail.com)
30- * BLASTEST : OK
31- * CTEST : OK
32- * TEST : OK
33- * LAPACK-TEST : OK
34- **************************************************************************************/
35-
361/*********************************************************************/
372/* Copyright 2009, 2010 The University of Texas at Austin. */
383/* All rights reserved. */
@@ -82,7 +47,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8247#endif
8348
8449#ifdef __64BIT__
85- #define STACKSIZE 320
50+ #define STACKSIZE 32000
8651#define ALPHA_R_SP 296 (SP)
8752#define ALPHA_I_SP 304 (SP)
8853#define FZERO 312 (SP)
@@ -133,11 +98,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13398#define alpha_r vs30
13499#define alpha_i vs31
135100
101+
102+ #define FRAMEPOINTER r12
103+
104+ #define BBUFFER r14
105+
136106#define L r15
137107#define ALPHA r16
138108#define o24 r17
139109#define T2 r19
140- #define KK r20
110+ #define BBO r20
141111#define o8 r21
142112#define I r22
143113#define J r23
@@ -156,8 +126,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
156126 PROLOGUE
157127 PROFCODE
158128
159- addi SP, SP, -STACKSIZE
160- li r0, 0
129+ mr FRAMEPOINTER, SP
130+ addi SP, SP, -STACKSIZE
131+ addi SP, SP, -STACKSIZE
132+ addi SP, SP, -STACKSIZE
133+ addi SP, SP, -STACKSIZE
134+ li r0, 0
161135
162136 stfd f14, 0 (SP)
163137 stfd f15, 8 (SP)
@@ -200,6 +174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
200174 std r17, 256 (SP)
201175 std r16, 264 (SP)
202176 std r15, 272 (SP)
177+ std r14, 280 (SP)
203178#else
204179 stw r31, 144 (SP)
205180 stw r30, 148 (SP)
@@ -226,37 +201,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
226201
227202#ifdef linux
228203#ifdef __64BIT__
229- ld LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
204+ ld LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
230205#endif
231206#endif
232207
233208#if defined(_AIX) || defined(__APPLE__)
234209#ifdef __64BIT__
235- ld LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
210+ ld LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
236211#else
237212#ifdef DOUBLE
238- lwz B, FRAMESLOT(0 ) + STACKSIZE(SP )
239- lwz C, FRAMESLOT(1 ) + STACKSIZE(SP )
240- lwz LDC, FRAMESLOT(2 ) + STACKSIZE(SP )
213+ lwz B, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
214+ lwz C, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
215+ lwz LDC, FRAMESLOT(2 ) + 0 (FRAMEPOINTER )
241216#else
242- lwz LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
217+ lwz LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
243218#endif
244219#endif
245220#endif
246221
247222#ifdef TRMMKERNEL
248223#if defined(linux) && defined(__64BIT__)
249- ld OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
224+ ld OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
250225#endif
251226
252227#if defined(_AIX) || defined(__APPLE__)
253228#ifdef __64BIT__
254- ld OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
229+ ld OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
255230#else
256231#ifdef DOUBLE
257- lwz OFFSET, FRAMESLOT(3 ) + STACKSIZE(SP )
232+ lwz OFFSET, FRAMESLOT(3 ) + 0 (FRAMEPOINTER )
258233#else
259- lwz OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
234+ lwz OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
260235#endif
261236#endif
262237#endif
@@ -268,34 +243,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
268243#include "zgemm_macros_8x2_power8.S"
269244
270245 cmpwi cr0, M, 0
271- ble . L999
246+ ble L999
272247 cmpwi cr0, N, 0
273- ble . L999
248+ ble L999
274249 cmpwi cr0, K, 0
275- ble . L999
250+ ble L999
276251
277252 slwi LDC, LDC, ZBASE_SHIFT
278- li PRE, 256
253+ li PRE, 384
279254 li o8 , 8
280255 li o16 , 16
281256 li o24 , 24
282257 li o32 , 32
283258 li o48 , 48
284259
260+ addi BBUFFER, SP, 512 +4096
261+ li T1, -4096
262+ and BBUFFER, BBUFFER, T1
263+
285264#ifdef __64BIT__
286265 addi ALPHA, SP, 296
287266#else
288267 addi ALPHA, SP, 224
289268#endif
290269
291- lxvdsx alpha_r, 0 , ALPHA
292- lxvdsx alpha_i, o8, ALPHA
270+ lxsdx alpha_r, 0 , ALPHA
271+ lxsdx alpha_i, o8, ALPHA
293272
294- .align 5
273+ .align 4
295274
296275#include "zgemm_logic_8x2_power8.S"
297276
298- . L999:
277+ L999:
299278 addi r3, 0 , 0
300279
301280 lfd f14, 0 (SP)
@@ -339,6 +318,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339318 ld r17, 256 (SP)
340319 ld r16, 264 (SP)
341320 ld r15, 272 (SP)
321+ ld r14, 280 (SP)
342322#else
343323 lwz r31, 144 (SP)
344324 lwz r30, 148 (SP)
@@ -360,6 +340,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
360340#endif
361341
362342 addi SP, SP, STACKSIZE
343+ addi SP, SP, STACKSIZE
344+ addi SP, SP, STACKSIZE
345+ addi SP, SP, STACKSIZE
363346
364347 blr
365348
0 commit comments