Skip to content

Commit 4806715

Browse files
committed
Fixed #456. Merged the optimizations for APM's
xgene-1 (aarch64). Merge branch 'benedikt-huber-dave-patch' into develop
2 parents 2987bc7 + 58c90d5 commit 4806715

8 files changed

Lines changed: 2442 additions & 16 deletions

File tree

CONTRIBUTORS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,5 +117,9 @@ In chronological order:
117117
* Isaac Dunham <https://github.com/idunham>
118118
* [2014-08-03] Fixed link error on Linux/musl
119119

120+
* Dave Nuechterlein
121+
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
122+
ARMv8 support.
123+
120124
* [Your name or handle] <[email or website]>
121125
* [Date] [Brief summary of your changes]

common_arm64.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
119119
}
120120

121121
#if defined(DOUBLE)
122-
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
122+
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
123123
#else
124-
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
124+
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
125125
#endif
126126

127127
#define GET_IMAGE_CANCEL
@@ -138,7 +138,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
138138
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
139139

140140
#define PROLOGUE \
141-
.arm ;\
142141
.global REALNAME ;\
143142
.func REALNAME ;\
144143
REALNAME:

cpuid_arm64.c

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/**************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include <string.h>
29+
30+
#define CPU_UNKNOWN 0
31+
#define CPU_ARMV8 1
32+
33+
static char *cpuname[] = {
34+
"UNKOWN",
35+
"ARMV8"
36+
};
37+
38+
39+
int get_feature(char *search)
40+
{
41+
42+
#ifdef linux
43+
FILE *infile;
44+
char buffer[2048], *p,*t;
45+
p = (char *) NULL ;
46+
47+
infile = fopen("/proc/cpuinfo", "r");
48+
49+
while (fgets(buffer, sizeof(buffer), infile))
50+
{
51+
52+
if (!strncmp("Features", buffer, 8))
53+
{
54+
p = strchr(buffer, ':') + 2;
55+
break;
56+
}
57+
}
58+
59+
fclose(infile);
60+
61+
62+
if( p == NULL ) return;
63+
64+
t = strtok(p," ");
65+
while( t = strtok(NULL," "))
66+
{
67+
if (!strcmp(t, search)) { return(1); }
68+
}
69+
70+
#endif
71+
return(0);
72+
}
73+
74+
75+
int detect(void)
76+
{
77+
78+
#ifdef linux
79+
80+
FILE *infile;
81+
char buffer[512], *p;
82+
p = (char *) NULL ;
83+
84+
infile = fopen("/proc/cpuinfo", "r");
85+
86+
while (fgets(buffer, sizeof(buffer), infile))
87+
{
88+
89+
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
90+
{
91+
p = strchr(buffer, ':') + 2;
92+
break;
93+
}
94+
}
95+
96+
fclose(infile);
97+
98+
if(p != NULL)
99+
{
100+
101+
if (strstr(p, "AArch64"))
102+
{
103+
return CPU_ARMV8;
104+
105+
}
106+
107+
108+
}
109+
#endif
110+
111+
return CPU_UNKNOWN;
112+
}
113+
114+
char *get_corename(void)
115+
{
116+
return cpuname[detect()];
117+
}
118+
119+
void get_architecture(void)
120+
{
121+
printf("ARM");
122+
}
123+
124+
void get_subarchitecture(void)
125+
{
126+
int d = detect();
127+
switch (d)
128+
{
129+
130+
case CPU_ARMV8:
131+
printf("ARMV8");
132+
break;
133+
134+
default:
135+
printf("UNKNOWN");
136+
break;
137+
}
138+
}
139+
140+
void get_subdirname(void)
141+
{
142+
printf("arm64");
143+
}
144+
145+
void get_cpuconfig(void)
146+
{
147+
148+
int d = detect();
149+
switch (d)
150+
{
151+
152+
case CPU_ARMV8:
153+
printf("#define ARMV8\n");
154+
printf("#define L1_DATA_SIZE 32768\n");
155+
printf("#define L1_DATA_LINESIZE 64\n");
156+
printf("#define L2_SIZE 262144\n");
157+
printf("#define L2_LINESIZE 64\n");
158+
printf("#define DTB_DEFAULT_ENTRIES 64\n");
159+
printf("#define DTB_SIZE 4096\n");
160+
printf("#define L2_ASSOCIATIVE 4\n");
161+
break;
162+
163+
164+
}
165+
}
166+
167+
168+
void get_libname(void)
169+
{
170+
171+
int d = detect();
172+
switch (d)
173+
{
174+
175+
case CPU_ARMV8:
176+
printf("armv8\n");
177+
break;
178+
179+
}
180+
}
181+
182+
183+
void get_features(void)
184+
{
185+
186+
#ifdef linux
187+
FILE *infile;
188+
char buffer[2048], *p,*t;
189+
p = (char *) NULL ;
190+
191+
infile = fopen("/proc/cpuinfo", "r");
192+
193+
while (fgets(buffer, sizeof(buffer), infile))
194+
{
195+
196+
if (!strncmp("Features", buffer, 8))
197+
{
198+
p = strchr(buffer, ':') + 2;
199+
break;
200+
}
201+
}
202+
203+
fclose(infile);
204+
205+
206+
if( p == NULL ) return;
207+
208+
t = strtok(p," ");
209+
while( t = strtok(NULL," "))
210+
{
211+
}
212+
213+
#endif
214+
return;
215+
}
216+
217+

getarch.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -746,12 +746,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
746746
#define SUBARCHITECTURE "ARMV8"
747747
#define SUBDIRNAME "arm64"
748748
#define ARCHCONFIG "-DARMV8 " \
749-
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
750-
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
751-
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
752-
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
749+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
750+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
751+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 "
753752
#define LIBNAME "armv8"
754-
#define CORENAME "ARMV8"
753+
#define CORENAME "XGENE1"
755754
#else
756755
#endif
757756

@@ -801,6 +800,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
801800
#define OPENBLAS_SUPPORTED
802801
#endif
803802

803+
#ifdef __aarch64__
804+
#include "cpuid_arm64.c"
805+
#define OPENBLAS_SUPPORTED
806+
#endif
807+
804808

805809
#ifndef OPENBLAS_SUPPORTED
806810
#error "This arch/CPU is not supported by OpenBLAS."
@@ -856,7 +860,7 @@ int main(int argc, char *argv[]){
856860
#ifdef FORCE
857861
printf("CORE=%s\n", CORENAME);
858862
#else
859-
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
863+
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
860864
printf("CORE=%s\n", get_corename());
861865
#endif
862866
#endif
@@ -956,7 +960,7 @@ int main(int argc, char *argv[]){
956960
#ifdef FORCE
957961
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
958962
#else
959-
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
963+
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
960964
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
961965
#endif
962966
#endif

kernel/arm64/KERNEL.ARMV8

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,14 @@ DGEMVTKERNEL = ../arm/gemv_t.c
8080
CGEMVTKERNEL = ../arm/zgemv_t.c
8181
ZGEMVTKERNEL = ../arm/zgemv_t.c
8282

83-
STRMMKERNEL = ../generic/trmmkernel_2x2.c
83+
STRMMKERNEL = ../generic/trmmkernel_4x4.c
8484
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
8585
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
8686
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
8787

88-
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
89-
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
90-
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
88+
SGEMMKERNEL = sgemm_kernel_4x4.S
89+
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
90+
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
9191
SGEMMONCOPYOBJ = sgemm_oncopy.o
9292
SGEMMOTCOPYOBJ = sgemm_otcopy.o
9393

0 commit comments

Comments
 (0)