Skip to content

Commit 1367a64

Browse files
committed
Merge branch 'develop' of github.com:xianyi/OpenBLAS into arm_soft_fp_abi
2 parents ccf41eb + e31948c commit 1367a64

96 files changed

Lines changed: 19307 additions & 777 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,4 @@ test/zblat2
6868
test/zblat3
6969
build
7070
build.*
71+
*.swp

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
66
project(OpenBLAS)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 2)
9-
set(OpenBLAS_PATCH_VERSION 14)
9+
set(OpenBLAS_PATCH_VERSION 16.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
enable_language(ASM)

Makefile.arm64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a
44
FCOMMON_OPT += -march=armv8-a
55
endif
66

7+
ifeq ($(CORE), CORTEXA57)
8+
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
9+
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
10+
endif
711

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.2.15
6+
VERSION = 0.2.16.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

TargetList.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,5 @@ ARMV5
7474

7575
7.ARM 64-bit CPU:
7676
ARMV8
77+
CORTEXA57
78+

benchmark/gemm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
172172
srandom(getpid());
173173
#endif
174174

175-
for(j = 0; j < m; j++){
175+
for(j = 0; j < to; j++){
176176
for(i = 0; i < to * COMPSIZE; i++){
177177
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
178178
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;

c_check

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
66
$hostarch = "x86_64" if ($hostarch eq "amd64");
77
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
88
$hostarch = "arm64" if ($hostarch eq "aarch64");
9+
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
910

1011
$binary = $ENV{"BINARY"};
1112

common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ extern "C" {
8686
#if !defined(_MSC_VER)
8787
#include <unistd.h>
8888
#endif
89+
#include <time.h>
8990

9091
#ifdef OS_LINUX
9192
#include <malloc.h>

common_arm64.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,10 @@ static inline int blas_quickdivide(blasint x, blasint y){
8989
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
9090

9191
#define PROLOGUE \
92+
.text ;\
93+
.align 4 ;\
9294
.global REALNAME ;\
93-
.func REALNAME ;\
95+
.type REALNAME, %function ;\
9496
REALNAME:
9597

9698
#define EPILOGUE
@@ -107,7 +109,11 @@ static inline int blas_quickdivide(blasint x, blasint y){
107109
#endif
108110
#define HUGE_PAGESIZE ( 4 << 20)
109111

112+
#if defined(CORTEXA57)
113+
#define BUFFER_SIZE (40 << 20)
114+
#else
110115
#define BUFFER_SIZE (16 << 20)
116+
#endif
111117

112118

113119
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

cpuid_arm64.c

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,19 @@
2929

3030
#define CPU_UNKNOWN 0
3131
#define CPU_ARMV8 1
32+
#define CPU_CORTEXA57 2
3233

3334
static char *cpuname[] = {
34-
"UNKOWN",
35-
"ARMV8"
35+
"UNKNOWN",
36+
"ARMV8" ,
37+
"CORTEXA57"
3638
};
3739

40+
static char *cpuname_lower[] = {
41+
"unknown",
42+
"armv8" ,
43+
"cortexa57"
44+
};
3845

3946
int get_feature(char *search)
4047
{
@@ -53,13 +60,13 @@ int get_feature(char *search)
5360
{
5461
p = strchr(buffer, ':') + 2;
5562
break;
56-
}
57-
}
63+
}
64+
}
5865

59-
fclose(infile);
66+
fclose(infile);
6067

6168

62-
if( p == NULL ) return;
69+
if( p == NULL ) return 0;
6370

6471
t = strtok(p," ");
6572
while( t = strtok(NULL," "))
@@ -82,11 +89,30 @@ int detect(void)
8289
p = (char *) NULL ;
8390

8491
infile = fopen("/proc/cpuinfo", "r");
92+
while (fgets(buffer, sizeof(buffer), infile))
93+
{
8594

95+
if (!strncmp("CPU part", buffer, 8))
96+
{
97+
p = strchr(buffer, ':') + 2;
98+
break;
99+
}
100+
}
101+
102+
fclose(infile);
103+
if(p != NULL) {
104+
if (strstr(p, "0xd07")) {
105+
return CPU_CORTEXA57;
106+
}
107+
}
108+
109+
p = (char *) NULL ;
110+
infile = fopen("/proc/cpuinfo", "r");
86111
while (fgets(buffer, sizeof(buffer), infile))
87112
{
88113

89-
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
114+
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) ||
115+
(!strncmp("CPU architecture", buffer, 16)))
90116
{
91117
p = strchr(buffer, ':') + 2;
92118
break;
@@ -100,7 +126,7 @@ int detect(void)
100126

101127
if (strstr(p, "AArch64"))
102128
{
103-
return CPU_ARMV8;
129+
return CPU_ARMV8;
104130

105131
}
106132

@@ -118,23 +144,13 @@ char *get_corename(void)
118144

119145
void get_architecture(void)
120146
{
121-
printf("ARM");
147+
printf("ARM64");
122148
}
123149

124150
void get_subarchitecture(void)
125151
{
126152
int d = detect();
127-
switch (d)
128-
{
129-
130-
case CPU_ARMV8:
131-
printf("ARMV8");
132-
break;
133-
134-
default:
135-
printf("UNKNOWN");
136-
break;
137-
}
153+
printf("%s", cpuname[d]);
138154
}
139155

140156
void get_subdirname(void)
@@ -160,26 +176,32 @@ void get_cpuconfig(void)
160176
printf("#define L2_ASSOCIATIVE 4\n");
161177
break;
162178

163-
179+
case CPU_CORTEXA57:
180+
printf("#define CORTEXA57\n");
181+
printf("#define HAVE_VFP\n");
182+
printf("#define HAVE_VFPV3\n");
183+
printf("#define HAVE_NEON\n");
184+
printf("#define HAVE_VFPV4\n");
185+
printf("#define L1_CODE_SIZE 49152\n");
186+
printf("#define L1_CODE_LINESIZE 64\n");
187+
printf("#define L1_CODE_ASSOCIATIVE 3\n");
188+
printf("#define L1_DATA_SIZE 32768\n");
189+
printf("#define L1_DATA_LINESIZE 64\n");
190+
printf("#define L1_DATA_ASSOCIATIVE 2\n");
191+
printf("#define L2_SIZE 2097152\n");
192+
printf("#define L2_LINESIZE 64\n");
193+
printf("#define L2_ASSOCIATIVE 16\n");
194+
break;
164195
}
165196
}
166197

167198

168199
void get_libname(void)
169200
{
170-
171201
int d = detect();
172-
switch (d)
173-
{
174-
175-
case CPU_ARMV8:
176-
printf("armv8\n");
177-
break;
178-
179-
}
202+
printf("%s", cpuname_lower[d]);
180203
}
181204

182-
183205
void get_features(void)
184206
{
185207

0 commit comments

Comments
 (0)