Skip to content

Commit e173039

Browse files
committed
Merge pull request #802 from ashwinyes/develop_20160314_dgemm_optimization
DGEMM Optimizations for Cortex-A57
2 parents f922627 + cf8c7e2 commit e173039

13 files changed

Lines changed: 21409 additions & 190 deletions

CONTRIBUTORS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,5 +141,11 @@ In chronological order:
141141
* Martin Koehler <https://github.com/grisuthedragon/>
142142
* [2015-09-07] Improved imatcopy
143143

144+
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
145+
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
146+
* [2015-11-20] lapack-test fixes for Cortex-A57
147+
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
148+
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
149+
144150
* [Your name or handle] <[email or website]>
145151
* [Date] [Brief summary of your changes]

kernel/arm64/KERNEL.CORTEXA57

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,32 +60,55 @@ DGEMVTKERNEL = gemv_t.S
6060
CGEMVTKERNEL = zgemv_t.S
6161
ZGEMVTKERNEL = zgemv_t.S
6262

63-
STRMMKERNEL = strmm_kernel_4x4.S
64-
DTRMMKERNEL = dtrmm_kernel_4x4.S
65-
CTRMMKERNEL = ctrmm_kernel_4x4.S
66-
ZTRMMKERNEL = ztrmm_kernel_4x4.S
67-
68-
SGEMMKERNEL = sgemm_kernel_4x4.S
69-
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
70-
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
63+
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
64+
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
65+
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
66+
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
67+
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
68+
SGEMMINCOPYOBJ = sgemm_incopy.o
69+
SGEMMITCOPYOBJ = sgemm_itcopy.o
70+
endif
71+
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
72+
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
7173
SGEMMONCOPYOBJ = sgemm_oncopy.o
7274
SGEMMOTCOPYOBJ = sgemm_otcopy.o
7375

74-
DGEMMKERNEL = dgemm_kernel_4x4.S
75-
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
76-
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
76+
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
77+
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
78+
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
79+
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
80+
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
81+
DGEMMINCOPYOBJ = dgemm_incopy.o
82+
DGEMMITCOPYOBJ = dgemm_itcopy.o
83+
endif
84+
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
85+
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
7786
DGEMMONCOPYOBJ = dgemm_oncopy.o
7887
DGEMMOTCOPYOBJ = dgemm_otcopy.o
7988

80-
CGEMMKERNEL = cgemm_kernel_4x4.S
81-
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
82-
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
89+
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
90+
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
91+
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
92+
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
93+
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
94+
CGEMMINCOPYOBJ = cgemm_incopy.o
95+
CGEMMITCOPYOBJ = cgemm_itcopy.o
96+
endif
97+
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
98+
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
8399
CGEMMONCOPYOBJ = cgemm_oncopy.o
84100
CGEMMOTCOPYOBJ = cgemm_otcopy.o
85101

86-
ZGEMMKERNEL = zgemm_kernel_4x4.S
87-
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
88-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
102+
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
103+
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
104+
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
105+
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
106+
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
107+
ZGEMMINCOPYOBJ = zgemm_incopy.o
108+
ZGEMMITCOPYOBJ = zgemm_itcopy.o
109+
endif
110+
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
111+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
89112
ZGEMMONCOPYOBJ = zgemm_oncopy.o
90113
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
91114

0 commit comments

Comments
 (0)