@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626*****************************************************************************/
2727
2828/**************************************************************************************
29- * 2016/03/18 Werner Saar (wernsaar@googlemail.com)
29+ * 2016/04/03 Werner Saar (wernsaar@googlemail.com)
3030* BLASTEST : OK
3131* CTEST : OK
3232* TEST : OK
33- * LAPACK-TEST : OK
33+ * LAPACK-TEST : OK
3434**************************************************************************************/
3535
3636/*********************************************************************/
@@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130130#endif
131131
132132#define o0 0
133- #define alpha_r vs30
134- #define alpha_i vs31
135133
136- #define TBUFFER r14
134+ #define alpha_dr vs28
135+ #define alpha_di vs29
136+ #define alpha_sr vs30
137+ #define alpha_si vs31
138+
139+
140+ #define NOTUSED r14
137141#define L r15
138142#define o12 r16
139143#define o4 r17
@@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271275#include "cgemm_macros_8x4_power8.S"
272276
273277 cmpwi cr0, M, 0
274- ble . L999_H1
278+ ble L999_H1
275279 cmpwi cr0, N, 0
276- ble . L999_H1
280+ ble L999_H1
277281 cmpwi cr0, K, 0
278- ble . L999_H1
282+ ble L999_H1
279283
280284 slwi LDC, LDC, ZBASE_SHIFT
281- li PRE, 256
285+ li PRE, 384
282286 li o4 , 4
283287 li o8 , 8
284288 li o12 , 12
285289 li o16 , 16
286290 li o32 , 32
287291 li o48 , 48
288- addi TBUFFER, SP, 360
289292
290293
291294#ifdef __64BIT__
@@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294297 addi T1 , SP, 224
295298#endif
296299
297- lxsspx alpha_r, 0 , T1
298- lxsspx alpha_i, o8, T1
300+ stxsspx vs1, 0 , T1
301+ lxsspx alpha_dr, 0 , T1
302+ stxsspx vs2, o8 , T1
303+ lxsspx alpha_di, o8, T1
304+ addi T1, SP, 360
305+ li T2, 0
306+
307+ stw T2, 0 (T1)
308+ stw T2, 4 (T1)
309+ stw T2, 8 (T1)
310+ stxsspx alpha_dr, o12, T1
311+ lxvw4x alpha_sr, o0 , T1
312+ addi T1, T1, 16
313+
314+ stw T2, 0 (T1)
315+ stw T2, 4 (T1)
316+ stw T2, 8 (T1)
317+ stxsspx alpha_di, o12, T1
318+ lxvw4x alpha_si, o0 , T1
299319
300320 .align 5
301321
302322#include "cgemm_logic_8x4_power8.S"
303323
304- . L999:
324+ L999:
305325 addi r3, 0 , 0
306326
307327 lfd f14, 0 (SP)
0 commit comments