Skip to content

Commit 503dcbf

Browse files
committed
Merge branch 'develop' into arm_soft_fp_abi
2 parents 1367a64 + 4227049 commit 503dcbf

6,787 files changed

Lines changed: 544802 additions & 89019 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@ lapack-3.4.2.tgz
1414
lapack-netlib/make.inc
1515
lapack-netlib/lapacke/include/lapacke_mangling.h
1616
lapack-netlib/TESTING/testing_results.txt
17+
lapack-netlib/INSTALL/test*
18+
lapack-netlib/TESTING/xeigtstc
19+
lapack-netlib/TESTING/xeigtstd
20+
lapack-netlib/TESTING/xeigtsts
21+
lapack-netlib/TESTING/xeigtstz
22+
lapack-netlib/TESTING/xlintstc
23+
lapack-netlib/TESTING/xlintstd
24+
lapack-netlib/TESTING/xlintstds
25+
lapack-netlib/TESTING/xlintstrfc
26+
lapack-netlib/TESTING/xlintstrfd
27+
lapack-netlib/TESTING/xlintstrfs
28+
lapack-netlib/TESTING/xlintstrfz
29+
lapack-netlib/TESTING/xlintsts
30+
lapack-netlib/TESTING/xlintstz
31+
lapack-netlib/TESTING/xlintstzc
1732
*.so
1833
*.so.*
1934
*.a
@@ -69,3 +84,6 @@ test/zblat3
6984
build
7085
build.*
7186
*.swp
87+
benchmark/*.goto
88+
benchmark/smallscaling
89+

.travis.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ before_install:
2424
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
2525
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
2626

27-
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
27+
script:
28+
- set -e
29+
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
30+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
31+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
32+
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
2833

2934
# whitelist
3035
branches:

CMakeLists.txt

Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
66
project(OpenBLAS)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 2)
9-
set(OpenBLAS_PATCH_VERSION 16.dev)
9+
set(OpenBLAS_PATCH_VERSION 20.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
enable_language(ASM)
@@ -30,10 +30,20 @@ set(NO_LAPACK 1)
3030
set(NO_LAPACKE 1)
3131
endif()
3232

33-
if(BUILD_DEBUG)
34-
set(CMAKE_BUILD_TYPE Debug)
33+
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
34+
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
35+
set(CMAKE_BUILD_TYPE
36+
Debug Debug
37+
Release Release
38+
)
3539
else()
36-
set(CMAKE_BUILD_TYPE Release)
40+
if( NOT CMAKE_BUILD_TYPE )
41+
if(BUILD_DEBUG)
42+
set(CMAKE_BUILD_TYPE Debug)
43+
else()
44+
set(CMAKE_BUILD_TYPE Release)
45+
endif()
46+
endif()
3747
endif()
3848

3949
if(BUILD_WITHOUT_CBLAS)
@@ -45,19 +55,15 @@ endif()
4555

4656
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
4757

48-
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
49-
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
58+
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
59+
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
5060

5161
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
5262

5363
if (NOT DYNAMIC_ARCH)
5464
list(APPEND BLASDIRS kernel)
5565
endif ()
5666

57-
if (DEFINED UTEST_CHECK)
58-
set(SANITY_CHECK 1)
59-
endif ()
60-
6167
if (DEFINED SANITY_CHECK)
6268
list(APPEND BLASDIRS reference)
6369
endif ()
@@ -110,6 +116,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
110116
message(FATAL_ERROR "Neither static nor shared are enabled.")
111117
endif ()
112118

119+
#Set default output directory
120+
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
121+
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
122+
113123
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
114124
set(TARGET_OBJS "")
115125
foreach (SUBDIR ${SUBDIRS})
@@ -123,9 +133,9 @@ endforeach ()
123133
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
124134
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
125135
if (NOT NOFORTRAN AND NOT NO_LAPACK)
126-
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
136+
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
127137
if (NOT NO_LAPACKE)
128-
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
138+
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
129139
endif ()
130140
endif ()
131141

@@ -137,22 +147,36 @@ endif()
137147
# add objects to the openblas lib
138148
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
139149

140-
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
150+
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
151+
152+
# Set output for libopenblas
153+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
154+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
141155

156+
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
157+
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
158+
159+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
160+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
161+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
162+
endforeach()
163+
164+
enable_testing()
165+
add_subdirectory(utest)
142166

143167
if(NOT MSVC)
144-
#only build shared library for MSVC
145-
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
146-
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
147-
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
148-
149-
if(SMP)
150-
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
151-
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
168+
#only build shared library for MSVC
169+
170+
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
171+
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
172+
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
173+
174+
if(SMP)
175+
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
176+
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
152177
endif()
153178

154179
#build test and ctest
155-
enable_testing()
156180
add_subdirectory(test)
157181
if(NOT NO_CBLAS)
158182
add_subdirectory(ctest)
@@ -188,3 +212,27 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
188212
#endif
189213
# @touch lib.grd
190214

215+
# Install project
216+
217+
# Install libraries
218+
install(TARGETS ${OpenBLAS_LIBNAME}
219+
RUNTIME DESTINATION bin
220+
ARCHIVE DESTINATION lib
221+
LIBRARY DESTINATION lib )
222+
223+
# Install include files
224+
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.h")
225+
install (FILES ${INCLUDE_FILES} DESTINATION include)
226+
227+
if(NOT MSVC)
228+
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION lib)
229+
endif()
230+
231+
include(FindPkgConfig QUIET)
232+
if(PKG_CONFIG_FOUND)
233+
set(prefix ${CMAKE_INSTALL_PREFIX})
234+
set(libdir ${CMAKE_INSTALL_PREFIX}/lib)
235+
set(includedir ${CMAKE_INSTALL_PREFIX}/include)
236+
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
237+
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION lib/pkgconfig/)
238+
endif()

CONTRIBUTORS.md

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,17 @@ In chronological order:
121121
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
122122
ARMv8 support.
123123

124+
* Jerome Robert <jeromerobert@gmx.com>
125+
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
126+
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
127+
* [2015-12-28] Allow to force the number of parallel make job
128+
* [2015-12-28] Fix detection of AMD E2-3200 detection
129+
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
130+
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
131+
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
132+
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
133+
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
134+
124135
* Dan Kortschak
125136
* [2015-01-07] Added test for drotmg bug #484.
126137

@@ -130,5 +141,29 @@ In chronological order:
130141
* Martin Koehler <https://github.com/grisuthedragon/>
131142
* [2015-09-07] Improved imatcopy
132143

133-
* [Your name or handle] <[email or website]>
134-
* [Date] [Brief summary of your changes]
144+
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
145+
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
146+
* [2015-11-20] lapack-test fixes for Cortex-A57
147+
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
148+
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
149+
150+
* theoractice <https://github.com/theoractice/>
151+
* [2016-03-20] Fix compiler error in VisualStudio with CMake
152+
* [2016-03-22] Fix access violation on Windows while static linking
153+
154+
* Paul Mustière <https://github.com/buffer51/>
155+
* [2016-02-04] Fix Android build on ARMV7
156+
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
157+
158+
* Shivraj Patil <https://github.com/sva-img/>
159+
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
160+
161+
* Kaustubh Raste <https://github.com/ksraste/>
162+
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
163+
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
164+
165+
* Abdelrauf <https://github.com/quickwritereader>
166+
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
167+
* [2017-02-26] ztrmm kernel for IBM z13
168+
169+

Changelog.txt

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,99 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.2.19
4+
1-Sep-2016
5+
common:
6+
* Improved cross compiling.
7+
* Fix the bug on musl libc.
8+
9+
POWER:
10+
* Optimize BLAS on Power8
11+
* Fixed Julia+OpenBLAS bugs on Power8
12+
13+
MIPS:
14+
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
15+
16+
ARM:
17+
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
18+
19+
20+
====================================================================
21+
Version 0.2.18
22+
12-Apr-2016
23+
common:
24+
* If you set MAKE_NB_JOBS flag less or equal than zero,
25+
make will be without -j.
26+
27+
x86/x86_64:
28+
* Support building Visual Studio static library. (#813, Thanks, theoractice)
29+
* Fix bugs to pass buidbot CI tests (http://build.openblas.net)
30+
31+
ARM:
32+
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K)
33+
34+
POWER:
35+
* Optimize S and C BLAS3 on Power8
36+
* Optimize BLAS2/1 on Power8
37+
38+
====================================================================
39+
Version 0.2.17
40+
20-Mar-2016
41+
common:
42+
* Enable BUILD_LAPACK_DEPRECATED=1 by default.
43+
44+
====================================================================
45+
Version 0.2.16
46+
15-Mar-2016
47+
common:
48+
* Avoid potential getenv segfault. (#716)
49+
* Import LAPACK svn bugfix #142-#147,#150-#155
50+
51+
x86/x86_64:
52+
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
53+
* Fix bug with scipy linalg test.
54+
55+
ARM:
56+
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
57+
58+
POWER:
59+
* Optimize D and Z BLAS3 functions for Power8.
60+
61+
====================================================================
62+
Version 0.2.16.rc1
63+
23-Feb-2016
64+
common:
65+
* Upgrade LAPACK to 3.6.0 version.
66+
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
67+
LAPACK deprecated functions.
68+
* Add MAKE_NB_JOBS option in Makefile.
69+
Force number of make jobs.This is particularly
70+
useful when using distcc. (#735. Thanks, Jerome Robert.)
71+
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
72+
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
73+
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
74+
* Let openblas_get_num_threads return the number of active threads.
75+
(#760. Thanks, Jerome Robert)
76+
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
77+
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
78+
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
79+
80+
x86/x86_64:
81+
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
82+
* Detect Intel Avoton.
83+
* Detect AMD Trinity, Richland, E2-3200.
84+
* Fix gemv performance bug on Mac OSX Intel Haswell.
85+
* Fix some bugs with CMake and Visual Studio
86+
87+
ARM:
88+
* Support and optimize Cortex-A57 AArch64.
89+
(#686. Thanks, Ashwin Sekhar TK)
90+
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
91+
* Update ARMV6 kernels.
92+
93+
POWER:
94+
* Fix detection of POWER architecture
95+
(#684. Thanks, Sebastien Villemot)
96+
297
====================================================================
398
Version 0.2.15
499
27-Oct-2015

0 commit comments

Comments
 (0)