Skip to content

Commit f5f50b3

Browse files
committed
added benchmarks for lapack potrf, potrs and potri functions
1 parent 651dd22 commit f5f50b3

2 files changed

Lines changed: 353 additions & 0 deletions

File tree

benchmark/Makefile

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
4242
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
4343
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
4444
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
45+
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
4546
ssymm.goto dsymm.goto csymm.goto zsymm.goto
4647

4748
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
@@ -59,6 +60,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
5960
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
6061
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
6162
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
63+
spotrf.acml dpotrf.acml cpotrf.acml zpotrf.acml \
6264
ssymm.acml dsymm.acml csymm.acml zsymm.acml
6365

6466
atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
@@ -77,6 +79,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
7779
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
7880
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
7981
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
82+
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
8083
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas
8184

8285
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
@@ -94,6 +97,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
9497
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
9598
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
9699
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
100+
spotrf.mkl dpotrf.mkl cpotrf.mkl zpotrf.mkl \
97101
ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl
98102

99103
all :: goto atlas acml mkl
@@ -838,6 +842,60 @@ zgetri.mkl : zgetri.$(SUFFIX)
838842
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
839843

840844

845+
##################################### Spotrf ####################################################
846+
spotrf.goto : spotrf.$(SUFFIX) ../$(LIBNAME)
847+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
848+
849+
spotrf.acml : spotrf.$(SUFFIX)
850+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
851+
852+
spotrf.atlas : spotrf.$(SUFFIX)
853+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
854+
855+
spotrf.mkl : spotrf.$(SUFFIX)
856+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
857+
858+
##################################### Dpotrf ####################################################
859+
dpotrf.goto : dpotrf.$(SUFFIX) ../$(LIBNAME)
860+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
861+
862+
dpotrf.acml : dpotrf.$(SUFFIX)
863+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
864+
865+
dpotrf.atlas : dpotrf.$(SUFFIX)
866+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
867+
868+
dpotrf.mkl : dpotrf.$(SUFFIX)
869+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
870+
871+
##################################### Cpotrf ####################################################
872+
873+
cpotrf.goto : cpotrf.$(SUFFIX) ../$(LIBNAME)
874+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
875+
876+
cpotrf.acml : cpotrf.$(SUFFIX)
877+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
878+
879+
cpotrf.atlas : cpotrf.$(SUFFIX)
880+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
881+
882+
cpotrf.mkl : cpotrf.$(SUFFIX)
883+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
884+
885+
##################################### Zpotrf ####################################################
886+
887+
zpotrf.goto : zpotrf.$(SUFFIX) ../$(LIBNAME)
888+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
889+
890+
zpotrf.acml : zpotrf.$(SUFFIX)
891+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
892+
893+
zpotrf.atlas : zpotrf.$(SUFFIX)
894+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
895+
896+
zpotrf.mkl : zpotrf.$(SUFFIX)
897+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
898+
841899

842900
###################################################################################################
843901

@@ -1003,6 +1061,19 @@ cgetri.$(SUFFIX) : getri.c
10031061
zgetri.$(SUFFIX) : getri.c
10041062
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
10051063

1064+
spotrf.$(SUFFIX) : potrf.c
1065+
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
1066+
1067+
dpotrf.$(SUFFIX) : potrf.c
1068+
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
1069+
1070+
cpotrf.$(SUFFIX) : potrf.c
1071+
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
1072+
1073+
zpotrf.$(SUFFIX) : potrf.c
1074+
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
1075+
1076+
10061077

10071078

10081079

benchmark/potrf.c

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#include <stdio.h>
40+
#include <stdlib.h>
41+
#ifdef __CYGWIN32__
42+
#include <sys/time.h>
43+
#endif
44+
#include "common.h"
45+
46+
double fabs(double);
47+
48+
#undef POTRF
49+
50+
#ifndef COMPLEX
51+
#ifdef XDOUBLE
52+
#define POTRF BLASFUNC(qpotrf)
53+
#define POTRS BLASFUNC(qpotrs)
54+
#define POTRI BLASFUNC(qpotri)
55+
#define SYRK BLASFUNC(qsyrk)
56+
#elif defined(DOUBLE)
57+
#define POTRF BLASFUNC(dpotrf)
58+
#define POTRS BLASFUNC(dpotrs)
59+
#define POTRI BLASFUNC(dpotri)
60+
#define SYRK BLASFUNC(dsyrk)
61+
#else
62+
#define POTRF BLASFUNC(spotrf)
63+
#define POTRS BLASFUNC(spotrs)
64+
#define POTRI BLASFUNC(spotri)
65+
#define SYRK BLASFUNC(ssyrk)
66+
#endif
67+
#else
68+
#ifdef XDOUBLE
69+
#define POTRF BLASFUNC(xpotrf)
70+
#define POTRS BLASFUNC(xpotrs)
71+
#define POTRI BLASFUNC(xpotri)
72+
#define SYRK BLASFUNC(xherk)
73+
#elif defined(DOUBLE)
74+
#define POTRF BLASFUNC(zpotrf)
75+
#define POTRS BLASFUNC(zpotrs)
76+
#define POTRI BLASFUNC(zpotri)
77+
#define SYRK BLASFUNC(zherk)
78+
#else
79+
#define POTRF BLASFUNC(cpotrf)
80+
#define POTRS BLASFUNC(cpotrs)
81+
#define POTRI BLASFUNC(cpotri)
82+
#define SYRK BLASFUNC(cherk)
83+
#endif
84+
#endif
85+
86+
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
87+
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
88+
89+
#if defined(__WIN32__) || defined(__WIN64__)
90+
91+
int gettimeofday(struct timeval *tv, void *tz){
92+
93+
FILETIME ft;
94+
unsigned __int64 tmpres = 0;
95+
static int tzflag;
96+
97+
if (NULL != tv)
98+
{
99+
GetSystemTimeAsFileTime(&ft);
100+
101+
tmpres |= ft.dwHighDateTime;
102+
tmpres <<= 32;
103+
tmpres |= ft.dwLowDateTime;
104+
105+
/*converting file time to unix epoch*/
106+
tmpres /= 10; /*convert into microseconds*/
107+
tmpres -= DELTA_EPOCH_IN_MICROSECS;
108+
tv->tv_sec = (long)(tmpres / 1000000UL);
109+
tv->tv_usec = (long)(tmpres % 1000000UL);
110+
}
111+
112+
return 0;
113+
}
114+
115+
#endif
116+
117+
int MAIN__(int argc, char *argv[]){
118+
119+
#ifndef COMPLEX
120+
char *trans[] = {"T", "N"};
121+
#else
122+
char *trans[] = {"C", "N"};
123+
#endif
124+
char *uplo[] = {"U", "L"};
125+
FLOAT alpha[] = {1.0, 0.0};
126+
FLOAT beta [] = {0.0, 0.0};
127+
128+
FLOAT *a, *b;
129+
130+
char *p;
131+
char btest = 'F';
132+
133+
blasint m, i, j, info, uplos=0;
134+
double flops;
135+
136+
int from = 1;
137+
int to = 200;
138+
int step = 1;
139+
140+
struct timeval start, stop;
141+
double time1;
142+
143+
argc--;argv++;
144+
145+
if (argc > 0) { from = atol(*argv); argc--; argv++;}
146+
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
147+
if (argc > 0) { step = atol(*argv); argc--; argv++;}
148+
149+
if ((p = getenv("OPENBLAS_UPLO")))
150+
if (*p == 'L') uplos=1;
151+
152+
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
153+
154+
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
155+
156+
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
157+
fprintf(stderr,"Out of Memory!!\n");exit(1);
158+
}
159+
160+
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
161+
fprintf(stderr,"Out of Memory!!\n");exit(1);
162+
}
163+
164+
for(m = from; m <= to; m += step){
165+
166+
#ifndef COMPLEX
167+
if (uplos & 1) {
168+
for (j = 0; j < m; j++) {
169+
for(i = 0; i < j; i++) a[i + j * m] = 0.;
170+
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
171+
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
172+
}
173+
} else {
174+
for (j = 0; j < m; j++) {
175+
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
176+
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
177+
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
178+
}
179+
}
180+
#else
181+
if (uplos & 1) {
182+
for (j = 0; j < m; j++) {
183+
for(i = 0; i < j; i++) {
184+
a[(i + j * m) * 2 + 0] = 0.;
185+
a[(i + j * m) * 2 + 1] = 0.;
186+
}
187+
188+
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
189+
a[(j + j * m) * 2 + 1] = 0.;
190+
191+
for(i = j + 1; i < m; i++) {
192+
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
193+
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
194+
}
195+
}
196+
} else {
197+
for (j = 0; j < m; j++) {
198+
for(i = 0; i < j; i++) {
199+
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
200+
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
201+
}
202+
203+
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
204+
a[(j + j * m) * 2 + 1] = 0.;
205+
206+
for(i = j + 1; i < m; i++) {
207+
a[(i + j * m) * 2 + 0] = 0.;
208+
a[(i + j * m) * 2 + 1] = 0.;
209+
}
210+
}
211+
}
212+
#endif
213+
214+
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
215+
216+
gettimeofday( &start, (struct timezone *)0);
217+
218+
POTRF(uplo[uplos], &m, b, &m, &info);
219+
220+
gettimeofday( &stop, (struct timezone *)0);
221+
222+
if (info != 0) {
223+
fprintf(stderr, "Potrf info = %d\n", info);
224+
exit(1);
225+
}
226+
227+
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
228+
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
229+
230+
if ( btest == 'S' )
231+
{
232+
233+
for(j = 0; j < to; j++){
234+
for(i = 0; i < to * COMPSIZE; i++){
235+
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
236+
}
237+
}
238+
239+
gettimeofday( &start, (struct timezone *)0);
240+
241+
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
242+
243+
gettimeofday( &stop, (struct timezone *)0);
244+
245+
if (info != 0) {
246+
fprintf(stderr, "Potrs info = %d\n", info);
247+
exit(1);
248+
}
249+
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
250+
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
251+
252+
}
253+
254+
if ( btest == 'I' )
255+
{
256+
257+
gettimeofday( &start, (struct timezone *)0);
258+
259+
POTRI(uplo[uplos], &m, b, &m, &info);
260+
261+
gettimeofday( &stop, (struct timezone *)0);
262+
263+
if (info != 0) {
264+
fprintf(stderr, "Potri info = %d\n", info);
265+
exit(1);
266+
}
267+
268+
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
269+
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
270+
}
271+
272+
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);
273+
274+
275+
}
276+
277+
278+
return 0;
279+
}
280+
281+
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
282+

0 commit comments

Comments
 (0)