Skip to content

Commit fced574

Browse files
committed
Merge branch 'release-0.2.16'
2 parents 53e849f + 8c0fb12 commit fced574

3,456 files changed

Lines changed: 203651 additions & 30385 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,4 @@ test/zblat2
6868
test/zblat3
6969
build
7070
build.*
71+
*.swp

.travis.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ before_install:
2424
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
2525
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
2626

27-
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
27+
script:
28+
- set -e
29+
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
30+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
31+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
32+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
2833

2934
# whitelist
3035
branches:

CMakeLists.txt

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
66
project(OpenBLAS)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 2)
9-
set(OpenBLAS_PATCH_VERSION 14)
9+
set(OpenBLAS_PATCH_VERSION 16)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
enable_language(ASM)
@@ -54,10 +54,6 @@ if (NOT DYNAMIC_ARCH)
5454
list(APPEND BLASDIRS kernel)
5555
endif ()
5656

57-
if (DEFINED UTEST_CHECK)
58-
set(SANITY_CHECK 1)
59-
endif ()
60-
6157
if (DEFINED SANITY_CHECK)
6258
list(APPEND BLASDIRS reference)
6359
endif ()
@@ -110,6 +106,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
110106
message(FATAL_ERROR "Neither static nor shared are enabled.")
111107
endif ()
112108

109+
#Set default output directory
110+
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
111+
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
112+
113113
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
114114
set(TARGET_OBJS "")
115115
foreach (SUBDIR ${SUBDIRS})
@@ -139,6 +139,17 @@ add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET
139139

140140
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
141141

142+
# Set output for libopenblas
143+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
144+
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
145+
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
146+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
147+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
148+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
149+
endforeach()
150+
151+
enable_testing()
152+
add_subdirectory(utest)
142153

143154
if(NOT MSVC)
144155
#only build shared library for MSVC
@@ -152,7 +163,6 @@ target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
152163
endif()
153164

154165
#build test and ctest
155-
enable_testing()
156166
add_subdirectory(test)
157167
if(NOT NO_CBLAS)
158168
add_subdirectory(ctest)

CONTRIBUTORS.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,17 @@ In chronological order:
121121
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
122122
ARMv8 support.
123123

124+
* Jerome Robert <jeromerobert@gmx.com>
125+
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
126+
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
127+
* [2015-12-28] Allow to force the number of parallel make job
128+
* [2015-12-28] Fix detection of AMD E2-3200 detection
129+
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
130+
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
131+
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
132+
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
133+
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
134+
124135
* Dan Kortschak
125136
* [2015-01-07] Added test for drotmg bug #484.
126137

@@ -130,5 +141,11 @@ In chronological order:
130141
* Martin Koehler <https://github.com/grisuthedragon/>
131142
* [2015-09-07] Improved imatcopy
132143

144+
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
145+
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
146+
* [2015-11-20] lapack-test fixes for Cortex-A57
147+
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
148+
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
149+
133150
* [Your name or handle] <[email or website]>
134151
* [Date] [Brief summary of your changes]

Changelog.txt

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,57 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.2.16
4+
15-Mar-2016
5+
common:
6+
* Avoid potential getenv segfault. (#716)
7+
* Import LAPACK svn bugfix #142-#147,#150-#155
8+
9+
x86/x86_64:
10+
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
11+
* Fix bug with scipy linalg test.
12+
13+
ARM:
14+
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
15+
16+
POWER:
17+
* Optimize D and Z BLAS3 functions for Power8.
18+
19+
====================================================================
20+
Version 0.2.16.rc1
21+
23-Feb-2016
22+
common:
23+
* Upgrade LAPACK to 3.6.0 version.
24+
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
25+
LAPACK deprecated functions.
26+
* Add MAKE_NB_JOBS option in Makefile.
27+
Force number of make jobs.This is particularly
28+
useful when using distcc. (#735. Thanks, Jerome Robert.)
29+
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
30+
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
31+
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
32+
* Let openblas_get_num_threads return the number of active threads.
33+
(#760. Thanks, Jerome Robert)
34+
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
35+
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
36+
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
37+
38+
x86/x86_64:
39+
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
40+
* Detect Intel Avoton.
41+
* Detect AMD Trinity, Richland, E2-3200.
42+
* Fix gemv performance bug on Mac OSX Intel Haswell.
43+
* Fix some bugs with CMake and Visual Studio
44+
45+
ARM:
46+
* Support and optimize Cortex-A57 AArch64.
47+
(#686. Thanks, Ashwin Sekhar TK)
48+
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
49+
* Update ARMV6 kernels.
50+
51+
POWER:
52+
* Fix detection of POWER architecture
53+
(#684. Thanks, Sebastien Villemot)
54+
255
====================================================================
356
Version 0.2.15
457
27-Oct-2015

Makefile

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
77
BLASDIRS += kernel
88
endif
99

10-
ifdef UTEST_CHECK
11-
SANITY_CHECK = 1
12-
endif
13-
1410
ifdef SANITY_CHECK
1511
BLASDIRS += reference
1612
endif
@@ -85,22 +81,22 @@ endif
8581

8682
shared :
8783
ifndef NO_SHARED
88-
ifeq ($(OSNAME), Linux)
84+
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
8985
@$(MAKE) -C exports so
90-
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
91-
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
86+
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
87+
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
9288
endif
9389
ifeq ($(OSNAME), FreeBSD)
9490
@$(MAKE) -C exports so
95-
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
91+
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
9692
endif
9793
ifeq ($(OSNAME), NetBSD)
9894
@$(MAKE) -C exports so
99-
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
95+
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
10096
endif
10197
ifeq ($(OSNAME), Darwin)
10298
@$(MAKE) -C exports dyn
103-
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
99+
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
104100
endif
105101
ifeq ($(OSNAME), WINNT)
106102
@$(MAKE) -C exports dll
@@ -117,10 +113,8 @@ ifndef CROSS
117113
touch $(LIBNAME)
118114
ifndef NO_FBLAS
119115
$(MAKE) -C test all
120-
ifdef UTEST_CHECK
121116
$(MAKE) -C utest all
122117
endif
123-
endif
124118
ifndef NO_CBLAS
125119
$(MAKE) -C ctest all
126120
endif
@@ -249,16 +243,23 @@ ifndef NOFORTRAN
249243
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
250244
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
251245
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
252-
ifeq ($(FC), gfortran)
246+
ifeq ($(F_COMPILER), GFORTRAN)
253247
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
254248
ifdef SMP
249+
ifeq ($(OSNAME), WINNT)
250+
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
251+
else
255252
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
253+
endif
256254
else
257255
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
258256
endif
259257
else
260258
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
261259
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
260+
endif
261+
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
262+
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
262263
endif
263264
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
264265
endif
@@ -288,8 +289,18 @@ endif
288289
lapack-test :
289290
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
290291
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
292+
ifneq ($(CROSS), 1)
293+
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
294+
./testsecond; ./testdsecnd; ./testieee; ./testversion )
295+
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
296+
endif
297+
298+
lapack-runtest:
299+
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
300+
./testsecond; ./testdsecnd; ./testieee; ./testversion )
291301
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
292302

303+
293304
blas-test:
294305
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
295306
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing

Makefile.arm

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ endif
1111

1212
ifeq ($(CORE), ARMV7)
1313
ifeq ($(OSNAME), Android)
14-
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
15-
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
14+
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
15+
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
1616
else
1717
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
1818
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
@@ -29,5 +29,3 @@ ifeq ($(CORE), ARMV5)
2929
CCOMMON_OPT += -marm -march=armv5
3030
FCOMMON_OPT += -marm -march=armv5
3131
endif
32-
33-

Makefile.arm64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a
44
FCOMMON_OPT += -march=armv8-a
55
endif
66

7+
ifeq ($(CORE), CORTEXA57)
8+
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
9+
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
10+
endif
711

Makefile.install

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ install : lib.grd
2929
#for inc
3030
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
3131
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
32-
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
32+
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
3333
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
3434
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
3535
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@@ -48,10 +48,10 @@ endif
4848

4949
ifndef NO_LAPACKE
5050
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
51-
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
52-
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
53-
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
54-
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
51+
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
52+
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
53+
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
54+
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
5555
endif
5656

5757
#for install static library
@@ -64,7 +64,7 @@ endif
6464
#for install shared library
6565
ifndef NO_SHARED
6666
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
67-
ifeq ($(OSNAME), Linux)
67+
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
6868
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
6969
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
7070
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \

Makefile.rule

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.2.15
6+
VERSION = 0.2.16
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -79,6 +79,9 @@ VERSION = 0.2.15
7979
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
8080
# NO_LAPACKE = 1
8181

82+
# Build LAPACK Deprecated functions since LAPACK 3.6.0
83+
# BUILD_LAPACK_DEPRECATED = 1
84+
8285
# If you want to use legacy threaded Level 3 implementation.
8386
# USE_SIMPLE_THREADED_LEVEL3 = 1
8487

@@ -108,6 +111,10 @@ NO_AFFINITY = 1
108111
# Don't use parallel make.
109112
# NO_PARALLEL_MAKE = 1
110113

114+
# Force number of make jobs. The default is the number of logical CPU of the host.
115+
# This is particularly useful when using distcc
116+
# MAKE_NB_JOBS = 2
117+
111118
# If you would like to know minute performance report of GotoBLAS.
112119
# FUNCTION_PROFILE = 1
113120

@@ -138,10 +145,6 @@ NO_AFFINITY = 1
138145
# slow (Not implemented yet).
139146
# SANITY_CHECK = 1
140147

141-
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
142-
# SANITY_CHECK to compare the result with reference BLAS.
143-
# UTEST_CHECK = 1
144-
145148
# The installation directory.
146149
# PREFIX = /opt/OpenBLAS
147150

@@ -159,10 +162,11 @@ COMMON_PROF = -pg
159162
# Build Debug version
160163
# DEBUG = 1
161164

162-
# Improve GEMV and GER for small matrices by stack allocation.
163-
# For details, https://github.com/xianyi/OpenBLAS/pull/482
165+
# Set maximum stack allocation.
166+
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
167+
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
164168
#
165-
MAX_STACK_ALLOC=2048
169+
# MAX_STACK_ALLOC = 0
166170

167171
# Add a prefix or suffix to all exported symbol names in the shared library.
168172
# Avoid conflicts with other BLAS libraries, especially when using

0 commit comments

Comments
 (0)