From 43faa3f292a273b2e8f3be178021799d629eb0b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Thu, 27 Jun 2024 14:43:55 -0400 Subject: [PATCH 01/14] . --- Project.toml | 6 +- src/QRupdate.jl | 218 ++++++++++++++++++++++++++++++++--------------- test/runtests.jl | 66 ++++++++++---- 3 files changed, 203 insertions(+), 87 deletions(-) diff --git a/Project.toml b/Project.toml index 3db357f..28dab6f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,11 @@ name = "QRupdate" uuid = "29d2ae51-7246-454b-9a65-19ff7b2849a5" -authors = ["Michael P. Friedlander ", - "Michael A. Saunders "] +authors = ["Michael P. Friedlander ", "Michael A. Saunders "] version = "1.0.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] julia = "1.7" @@ -14,4 +14,4 @@ julia = "1.7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] \ No newline at end of file +test = ["Test"] diff --git a/src/QRupdate.jl b/src/QRupdate.jl index d0e627c..5c9db7d 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -1,14 +1,19 @@ module QRupdate -using LinearAlgebra +using LinearAlgebra, TimerOutputs + +export qraddcol, qraddcol!, qraddrow, qrdelcol, qrdelcol!, csne, csne! + +function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} + Base.permutecols!!(M, replace(axes(M,2), i=>j, j=>i)) +end -export qraddcol, qraddcol!, qraddrow, qrdelcol, qrdelcol!, csne """ Auxiliary function used to solve fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveR!(R::AbstractMatrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} +function solveR!(R::Matrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} # Note: R is upper triangular @inbounds sol[realSize] = b[realSize] / R[realSize, realSize] for i in (realSize-1):-1:1 @@ -24,7 +29,7 @@ end Auxiliary function used to solve transpose of fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveRT!(R::AbstractMatrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} +function solveRT!(R::Matrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} # Note: R is upper triangular @inbounds sol[1] = b[1] / R[1, 1] for i in 2:realSize @@ -132,17 +137,38 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::Vector{T}, N::Int64, β::T = zero(T)) where {T} +function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vector{T}, work2::Vector{T}, u::Vector{T}, z::Vector{T}, r::Vector{T}, β::T = zero(T)) where {T} + #c,u,z,du,dz are R^n. Only r is R^m + #c -> work; du -> work2. dz is redundant + #@timeit "get views" begin m, n = size(A) + if N < n + cols = 1:N + Atr = view(A, :, cols) #truncated + Rtr = view(R, cols, cols) + work_tr = view(work, cols) + work2_tr = view(work2, cols) + u_tr = view(u, cols) + z_tr = view(z, cols) + else + cols = 1:N + Atr = A + Rtr = R + work_tr = work + work2_tr = work2 + u_tr = u + z_tr = z + end + #end #timeit get views + + # End checking work vectors # First add vector to A - for i in 1:m - @inbounds A[i,N+1] = a[i] - end + view(A,:,N+1) .= a - anorm = norm(a) - anorm2 = anorm^2 + #@timeit "norms" begin + anorm2 = a'a β2 = β^2 if β != 0 anorm2 = anorm2 + β2 @@ -150,76 +176,74 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::Vector{T}, N:: end if N == 0 - #return reshape([anorm], 1, 1) + anorm = sqrt(anorm2) R[1,1] = anorm return end + #end #timeit norms - c = zeros(N) - u = zeros(N) - du = zeros(N) - - for i in 1:N #c = A'a - for j in 1:m - @inbounds c[i] += A[j,i] * a[j] - end - end - solveRT!(R, c, u, N) #u = R'\c - unorm2 = norm(u)^2 - d2 = anorm2 - unorm2 + # work := c = A'a + mul!(work_tr, Atr', a) + solveRT!(R, work, u, N) #u = R'\c = R'\work + #@timeit "norms 2" begin + unorm2 = u'u + unorm2_prev = anorm2 + #end #timeit norms 2 - z = zeros(N) - dz = zeros(N) - r = zeros(m) - if d2 > anorm2 - γ = sqrt(d2) - else + err = unorm2 / unorm2_prev + unorm2_prev = unorm2 + # Iterative refinement + if err >= 0.5 + solveR!(R, u, z, N) #z = R\u + copy!(r, a) + mul!(r, Atr, z_tr, -1, 1) #r = a - A*z + γ = norm(r) + end + + while err < 0.5 + println("Reorthogonalize:", err) solveR!(R, u, z, N) #z = R\u - #mul!(r, A, z, -1, 1) #r = a - A*z - for i in 1:m - @inbounds r[i] = a[i] - for j in 1:N - @inbounds r[i] -= A[i,j] * z[j] - end - end - #mul!(c, A', r) #c = A'r - c[:] .= 0.0 - for i in 1:N - for j in 1:m - @inbounds c[i] += A[j,i] * r[j] - end - end + + #@timeit "residual" begin + copy!(r, a) + mul!(r, Atr, z_tr, -1, 1) #r = a - A*z + #end #timeit residual + + mul!(work_tr, Atr', r) # work := c = A'r if β != 0 - axpy!(-β2, z, c) #c = c - β2*z + axpy!(-β2, z, work) #c = c - β2*z end - solveRT!(R, c, du, N) #du = R'\c - solveR!(R, du, dz, N) #dz = R\du - axpy!(1, dz, z) #z += dz # Refine z - # u = R*z # Original: Bjork's version. - axpy!(1, du, u) #u += du # Modification: Refine u + solveRT!(R, work, work2, N) # work2 := du = R'\c + axpy!(1, work2_tr, u_tr) #u += du # Modification: Refine u + solveR!(R, work2, work, N) # work := dz = R\du + axpy!(1, work_tr, z_tr) #z += dz # Refine z #r = a - A*z - for i in 1:m - @inbounds r[i] = a[i] - for j in 1:N - @inbounds r[i] -= A[i,j] * z[j] - end - end + #@timeit "residual 2" begin + copy!(r, a) + mul!(r, Atr, z_tr, -1, 1) + #end #@timeit residual 2 γ = norm(r) # Safe computation (we know gamma >= 0). if !iszero(β) γ = sqrt(γ^2 + β2*norm(z)^2 + β2) end + unorm2 = u'u + err = unorm2 / unorm2_prev + unorm2_prev = unorm2 end # This seems to be faster than concatenation, ie: # [ Rin u # zeros(1,n) γ ] - for i in 1:N - @inbounds R[i, N+1] = u[i] - end + #for i in 1:N + #@inbounds R[i, N+1] = u[i] + #end + #@timeit "final update" begin + view(R,1:N,N+1) .= view(u, 1:N) R[N+1,N+1] = γ + #end #timeit final update end """ @@ -286,22 +310,26 @@ end This function is identical to the previous one, but instead leaves R with a column of zeros. This is useful to avoid copying the matrix. """ -function qrdelcol!(A::AbstractMatrix{T},R::AbstractMatrix{T}, k::Int) where {T} +function qrdelcol!(A::Matrix{T},R::Matrix{T}, k::Int) where {T} + #@timeit "all" begin m = size(A,1) - n = size(A,2) # Should have m=n+1 + n = size(A,2) + # Shift columns. This is apparently faster than copying views. - for j in (k+1):n, i in 1:m - @inbounds R[i,j-1] = R[i, j] - @inbounds A[i,j-1] = A[i, j] - end - for i in 1:m - @inbounds R[i,n] = 0.0 - @inbounds A[i,n] = 0.0 - end + #@timeit "shift" begin + for j in (k+1):n + swapcols!(A, j-1, j) + swapcols!(R, j-1, j) + end + #end # timeit shift + R[:, n] .= 0.0 + A[:, n] .= 0.0 + + #@timeit "givens downdate" begin for j in k:(n-1) # Forward sweep to reduce k-th row to zeros @inbounds G, y = givens(R[j+1,j], R[k,j], 1, 2) @inbounds R[j+1,j] = y @@ -313,14 +341,18 @@ function qrdelcol!(A::AbstractMatrix{T},R::AbstractMatrix{T}, k::Int) where {T} end end end + #end # timeit givens downdate # Shift k-th row. We skipped the removed column. + #@timeit "shift row" begin for j in k:(n-1) for i in k:j @inbounds R[i,j] = R[i+1, j] end @inbounds R[j+1,j] = 0 end + #end # timeit shift row + #end #timeit all end @@ -355,5 +387,55 @@ function csne(Rin::AbstractMatrix{T}, A::AbstractMatrix{T}, b::Vector{T}) where return (x, r) end +function csne!(R::Matrix{T}, A::Matrix{T}, b::Vector{T}, sol::Vector{T}, work::Vector{T}, work2::Vector{T}, u::Vector{T}, r::Vector{T}, N::Int) where {T} + #c,u,sol,du,dsol are R^n. Only r is R^m + #c -> work; du -> work2. dsol is redundant. + + m, n = size(A) + if N == n + cols = 1:N + Atr = view(A, :, cols) #truncated + Rtr = view(R, cols, cols) + work_tr = view(work, cols) + work2_tr = view(work2, cols) + u_tr = view(u, cols) + sol_tr = view(sol, cols) + else + cols = 1:N + Atr = A + Rtr = R + work_tr = work + work2_tr = work2 + u_tr = u + sol_tr = sol + end + + mul!(work_tr, Atr', b) + + solveRT!(R, work, u, N) # work_n2 := x = R' \ work_n + + unorm2_prev = b'b + unorm2 = u'u + err = unorm2 / unorm2_prev + unorm2_prev = unorm2 + while err < 0.5 + println("Reorthogonalize csne:", err) + + solveR!(R, u, sol, N) # sol := x = R \ work_n2 + copy!(r, b) + mul!(r, Atr, sol_tr, -1, 1) # r = b - A sol + mul!(work_tr, Atr', r) # work_n := c = A'r + + # At this point, work_n = q, sol = x + solveRT!(R, work, work2, N) # work2 := du = R' \ work + + axpy!(1.0, work2, u) + unorm2 = u'u + err = unorm2 / unorm2_prev + unorm2_prev = unorm2 + end + solveR!(R, u, sol, N) # sol := x = R \ work_n2 +end + end # module diff --git a/test/runtests.jl b/test/runtests.jl index 7f40f75..51cb7aa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,56 @@ import QRupdate: solveR!, solveRT! using Test using LinearAlgebra +@testset "qraddcol!" begin + m = 100 + work = rand(m) + work2 = rand(m) + work3 = rand(m) + work4 = rand(m) + work5 = rand(m) + A = randn(m, 0) + R = Array{Float64, 2}(undef, 0, 0) + Rin = zeros(m,m) + Ain = zeros(m, m) + for i in 1:m + a = randn(m) + R = qraddcol(A, R, a) + A = [A a] + qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) + @test norm(R) - norm(Rin) < 1e-10 + @test norm(A) - norm(Ain) < 1e-10 + end +end + + + +@testset "csne!" begin + + # work vecs + m = 100 + work = rand(m) + work2 = rand(m) + work3 = rand(m) + work4 = rand(m) + work5 = rand(m) + sol = zeros(m) + A = randn(m, 0) + R = Array{Float64, 2}(undef, 0, 0) + b = randn(m) + Rin = zeros(m,m) + Ain = zeros(m, m) + for i in 1:m + a = randn(m) + R = qraddcol(A, R, a) + A = [A a] + qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) + x, r = csne(R, A, b) + csne!(Rin, Ain, b, sol, work, work2, work3, work4, i) + @test norm(x) - norm(sol) < 1e-8 + end +end + + @testset "qraddcol with β = 0" begin m = 100 A = randn(m, 0) @@ -96,23 +146,7 @@ end solveRT!(A,rhs,sol,m) @test norm(A' * sol - rhs) < 1e-10 end - end -@testset "qraddcol!" begin - m = 100 - A = randn(m, 0) - R = Array{Float64, 2}(undef, 0, 0) - Rin = zeros(m,m) - Ain = zeros(m, m) - for i in 1:m - a = randn(m) - R = qraddcol(A, R, a) - A = [A a] - qraddcol!(Ain, Rin, a, i-1) - @test norm(R) - norm(Rin) < 1e-10 - @test norm(A) - norm(Ain) < 1e-10 - end -end From ab6f1b11ab7119c74b1722a57c7fa3bb492e66fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Thu, 27 Jun 2024 20:23:07 -0400 Subject: [PATCH 02/14] . --- src/QRupdate.jl | 142 ++++++++++++++++++++++++----------------------- test/runtests.jl | 11 ++-- 2 files changed, 79 insertions(+), 74 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 5c9db7d..0b9c771 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -8,6 +8,9 @@ function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} Base.permutecols!!(M, replace(axes(M,2), i=>j, j=>i)) end +ORTHO_TOL = 1.0 # err = |unew|^2 / |uold|^2 < ORTHO_TOL +ORTHO_MAX_IT = 1 +verbose = true """ Auxiliary function used to solve fully allocated but incomplete R matrices. @@ -143,6 +146,12 @@ function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vec #@timeit "get views" begin m, n = size(A) + @assert size(work,1) == n "Expected "*string(n)*", actual size: " * string(size(work)) + @assert size(work2,1) == n + @assert size(u,1) == n + @assert size(z,1) == n + @assert size(r,1) == m + if N < n cols = 1:N Atr = view(A, :, cols) #truncated @@ -152,7 +161,6 @@ function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vec u_tr = view(u, cols) z_tr = view(z, cols) else - cols = 1:N Atr = A Rtr = R work_tr = work @@ -162,11 +170,6 @@ function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vec end #end #timeit get views - # End checking work vectors - - # First add vector to A - view(A,:,N+1) .= a - #@timeit "norms" begin anorm2 = a'a β2 = β^2 @@ -178,6 +181,7 @@ function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vec if N == 0 anorm = sqrt(anorm2) R[1,1] = anorm + view(A,:,N+1) .= a return end #end #timeit norms @@ -190,60 +194,54 @@ function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vec unorm2_prev = anorm2 #end #timeit norms 2 + solveR!(R, u, z, N) #z = R\u + copy!(r, a) + mul!(r, Atr, z_tr, -1, 1) #r = a - A*z + γ = norm(r) + mul!(work_tr, Atr', r) # r := c = A'r + err = sqrt(work_tr'work_tr / anorm2) - err = unorm2 / unorm2_prev - unorm2_prev = unorm2 - # Iterative refinement - if err >= 0.5 - solveR!(R, u, z, N) #z = R\u - copy!(r, a) - mul!(r, Atr, z_tr, -1, 1) #r = a - A*z - γ = norm(r) - end - - while err < 0.5 - println("Reorthogonalize:", err) - solveR!(R, u, z, N) #z = R\u - #@timeit "residual" begin - copy!(r, a) - mul!(r, Atr, z_tr, -1, 1) #r = a - A*z - #end #timeit residual - - mul!(work_tr, Atr', r) # work := c = A'r + # Iterative refinement + if err < ORTHO_TOL + view(R,1:N,N+1) .= view(u, 1:N) + R[N+1,N+1] = γ + view(A,:,N+1) .= a + return + else + i = 0 + while err > ORTHO_TOL && i < ORTHO_MAX_IT - if β != 0 - axpy!(-β2, z, work) #c = c - β2*z - end + #if β != 0 + #axpy!(-β2, z, work) #c = c - β2*z + #end solveRT!(R, work, work2, N) # work2 := du = R'\c - axpy!(1, work2_tr, u_tr) #u += du # Modification: Refine u solveR!(R, work2, work, N) # work := dz = R\du - axpy!(1, work_tr, z_tr) #z += dz # Refine z - #r = a - A*z + axpy!(1.0, work_tr, z_tr) #z += dz # Refine z #@timeit "residual 2" begin + copy!(r, a) - mul!(r, Atr, z_tr, -1, 1) - #end #@timeit residual 2 + mul!(r, Atr, z_tr, -1.0, 1.0) #r = a - A*z + γ = norm(r) + work .= 0.0 + mul!(work_tr, Atr', r) # work := c = A'r - γ = norm(r) # Safe computation (we know gamma >= 0). - if !iszero(β) - γ = sqrt(γ^2 + β2*norm(z)^2 + β2) - end - unorm2 = u'u - err = unorm2 / unorm2_prev - unorm2_prev = unorm2 - end + err = sqrt(work_tr'work_tr / anorm2) + #verbose && println(" *** Reorthogonalize ",string(i)," . Error:", err) + verbose && print("*") + i += 1 - # This seems to be faster than concatenation, ie: - # [ Rin u - # zeros(1,n) γ ] - #for i in 1:N - #@inbounds R[i, N+1] = u[i] - #end - #@timeit "final update" begin + + #if !iszero(β) + #γ = sqrt(γ^2 + β2*norm(z)^2 + β2) + #end + end # while + end # if + + axpy!(1, work2_tr, u_tr) view(R,1:N,N+1) .= view(u, 1:N) R[N+1,N+1] = γ - #end #timeit final update + view(A,:,N+1) .= a end """ @@ -388,10 +386,15 @@ function csne(Rin::AbstractMatrix{T}, A::AbstractMatrix{T}, b::Vector{T}) where end function csne!(R::Matrix{T}, A::Matrix{T}, b::Vector{T}, sol::Vector{T}, work::Vector{T}, work2::Vector{T}, u::Vector{T}, r::Vector{T}, N::Int) where {T} - #c,u,sol,du,dsol are R^n. Only r is R^m + #c,u,sol,du are R^n. Only r is R^m #c -> work; du -> work2. dsol is redundant. m, n = size(A) + @assert size(sol,1) == n + @assert size(work,1) == n + @assert size(work2,1) == n + @assert size(u,1) == n + @assert size(r,1) == m if N == n cols = 1:N Atr = view(A, :, cols) #truncated @@ -401,7 +404,6 @@ function csne!(R::Matrix{T}, A::Matrix{T}, b::Vector{T}, sol::Vector{T}, work::V u_tr = view(u, cols) sol_tr = view(sol, cols) else - cols = 1:N Atr = A Rtr = R work_tr = work @@ -409,32 +411,34 @@ function csne!(R::Matrix{T}, A::Matrix{T}, b::Vector{T}, sol::Vector{T}, work::V u_tr = u sol_tr = sol end - + bnorm2 = b'b + # work := c = A'b mul!(work_tr, Atr', b) + solveRT!(R, work, u, N) - solveRT!(R, work, u, N) # work_n2 := x = R' \ work_n + solveR!(R, u, sol, N) #z = R\u + copy!(r, b) + mul!(r, Atr, sol_tr, -1, 1) #r = b - A*z + mul!(work_tr, Atr', r) # r := c = A'r + err = sqrt(work_tr'work_tr / bnorm2) - unorm2_prev = b'b - unorm2 = u'u - err = unorm2 / unorm2_prev - unorm2_prev = unorm2 - while err < 0.5 - println("Reorthogonalize csne:", err) + i = 0 + while err > ORTHO_TOL && i < ORTHO_MAX_IT + + solveRT!(R, work, work2, N) # work2 := du = R'\c + solveR!(R, work2, work, N) # work := dz = R\du + axpy!(1.0, work_tr, sol_tr) #z += dz # Refine z - solveR!(R, u, sol, N) # sol := x = R \ work_n2 copy!(r, b) - mul!(r, Atr, sol_tr, -1, 1) # r = b - A sol - mul!(work_tr, Atr', r) # work_n := c = A'r + mul!(r, Atr, sol_tr, -1.0, 1.0) #r = b - A*z + mul!(work_tr, Atr', r) # work := c = A'r - # At this point, work_n = q, sol = x - solveRT!(R, work, work2, N) # work2 := du = R' \ work + err = sqrt(work_tr'work_tr / bnorm2) + #verbose && println(" *** Reorthogonalize ",string(i), " CSNE. Error:", err) + verbose && print("*") + i += 1 - axpy!(1.0, work2, u) - unorm2 = u'u - err = unorm2 / unorm2_prev - unorm2_prev = unorm2 end - solveR!(R, u, sol, N) # sol := x = R \ work_n2 end diff --git a/test/runtests.jl b/test/runtests.jl index 51cb7aa..1aa1c90 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,8 +19,8 @@ using LinearAlgebra R = qraddcol(A, R, a) A = [A a] qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) - @test norm(R) - norm(Rin) < 1e-10 - @test norm(A) - norm(Ain) < 1e-10 + @test norm(R - view(Rin,1:i, 1:i)) < 1e-10 + @test norm(A - view(Ain, :, 1:i)) < 1e-10 end end @@ -48,7 +48,7 @@ end qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) x, r = csne(R, A, b) csne!(Rin, Ain, b, sol, work, work2, work3, work4, i) - @test norm(x) - norm(sol) < 1e-8 + @test norm(x - view(sol, 1:i)) < 1e-8 end end @@ -121,8 +121,9 @@ end A = A[:,1:i .!= k] R = qrdelcol(R, k) qrdelcol!(Ain, Rin, k) - @test norm(R) - norm(Rin) < 1e-10 - @test norm(A) - norm(Ain) < 1e-10 + im1 = i-1 + @test norm(R- view(Rin,1:im1,1:im1)) < 1e-10 + @test norm(A - view(Ain,:,1:im1)) < 1e-10 end end From 8d3cebfd8ed6129ddbc8b6c318e4683dc35091c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Fri, 28 Jun 2024 15:57:53 -0400 Subject: [PATCH 03/14] . --- src/QRupdate.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 0b9c771..b2c3b10 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -8,7 +8,7 @@ function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} Base.permutecols!!(M, replace(axes(M,2), i=>j, j=>i)) end -ORTHO_TOL = 1.0 # err = |unew|^2 / |uold|^2 < ORTHO_TOL +ORTHO_TOL = 1e-6 # err = |unew|^2 / |uold|^2 < ORTHO_TOL ORTHO_MAX_IT = 1 verbose = true @@ -16,7 +16,7 @@ verbose = true Auxiliary function used to solve fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveR!(R::Matrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} +function solveR!(R::matT, b::vecT, sol::vecT, realSize::Int64) where {matT, vecT} # Note: R is upper triangular @inbounds sol[realSize] = b[realSize] / R[realSize, realSize] for i in (realSize-1):-1:1 @@ -32,7 +32,7 @@ end Auxiliary function used to solve transpose of fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveRT!(R::Matrix{T}, b::Vector{T}, sol::Vector{T}, realSize::Int64) where {T} +function solveRT!(R::matT, b::vecT, sol::vecT, realSize::Int64) where {matT, vecT} # Note: R is upper triangular @inbounds sol[1] = b[1] / R[1, 1] for i in 2:realSize @@ -140,7 +140,7 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -function qraddcol!(A::Matrix{T}, R::Matrix{T}, a::Vector{T}, N::Int64, work::Vector{T}, work2::Vector{T}, u::Vector{T}, z::Vector{T}, r::Vector{T}, β::T = zero(T)) where {T} +function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z::zT, r::rT, β::T = zero(Float64)) where {AT,RT,aT,wT,w2T,uT,zT,rT,T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant @@ -385,7 +385,7 @@ function csne(Rin::AbstractMatrix{T}, A::AbstractMatrix{T}, b::Vector{T}) where return (x, r) end -function csne!(R::Matrix{T}, A::Matrix{T}, b::Vector{T}, sol::Vector{T}, work::Vector{T}, work2::Vector{T}, u::Vector{T}, r::Vector{T}, N::Int) where {T} +function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r::rT, N::Int) where {RT,AT,bT,solT,wT,w2T,uT,rT} #c,u,sol,du are R^n. Only r is R^m #c -> work; du -> work2. dsol is redundant. From 4108db45440f8546877db848d86ed7f184aa2333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Mon, 12 Aug 2024 23:43:07 -0400 Subject: [PATCH 04/14] . --- src/QRupdate.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index b2c3b10..26be449 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -188,12 +188,7 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z # work := c = A'a mul!(work_tr, Atr', a) - solveRT!(R, work, u, N) #u = R'\c = R'\work - #@timeit "norms 2" begin - unorm2 = u'u - unorm2_prev = anorm2 - #end #timeit norms 2 - + solveRT!(R, work_tr, u, N) #u = R'\c = R'\work solveR!(R, u, z, N) #z = R\u copy!(r, a) mul!(r, Atr, z_tr, -1, 1) #r = a - A*z From 16c2942a7022fd242cfbe683229146a34566958e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Mon, 30 Sep 2024 07:44:59 -0300 Subject: [PATCH 05/14] . --- src/QRupdate.jl | 12 ++---------- test/runtests.jl | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 26be449..53a4055 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -32,7 +32,7 @@ end Auxiliary function used to solve transpose of fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveRT!(R::matT, b::vecT, sol::vecT, realSize::Int64) where {matT, vecT} +function solveRT!(R::matT, b::vecT, sol::vecT2, realSize::Int64) where {matT, vecT, vecT2} # Note: R is upper triangular @inbounds sol[1] = b[1] / R[1, 1] for i in 2:realSize @@ -140,7 +140,7 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z::zT, r::rT, β::T = zero(Float64)) where {AT,RT,aT,wT,w2T,uT,zT,rT,T} +function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z::zT, r::rT) where {AT,RT,aT,wT,w2T,uT,zT,rT,T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant @@ -172,11 +172,6 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z #@timeit "norms" begin anorm2 = a'a - β2 = β^2 - if β != 0 - anorm2 = anorm2 + β2 - anorm = sqrt(anorm2) - end if N == 0 anorm = sqrt(anorm2) @@ -207,9 +202,6 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z i = 0 while err > ORTHO_TOL && i < ORTHO_MAX_IT - #if β != 0 - #axpy!(-β2, z, work) #c = c - β2*z - #end solveRT!(R, work, work2, N) # work2 := du = R'\c solveR!(R, work2, work, N) # work := dz = R\du axpy!(1.0, work_tr, z_tr) #z += dz # Refine z diff --git a/test/runtests.jl b/test/runtests.jl index 1aa1c90..23d9833 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,18 +5,19 @@ using LinearAlgebra @testset "qraddcol!" begin m = 100 - work = rand(m) - work2 = rand(m) - work3 = rand(m) - work4 = rand(m) - work5 = rand(m) - A = randn(m, 0) - R = Array{Float64, 2}(undef, 0, 0) - Rin = zeros(m,m) - Ain = zeros(m, m) + T = ComplexF64 + work = rand(T, m) + work2 = rand(T, m) + work3 = rand(T, m) + work4 = rand(T, m) + work5 = rand(T, m) + A = randn(T, m, 0) + R = Array{T, 2}(undef, 0, 0) + Rin = zeros(T,m,m) + Ain = zeros(T,m,m) for i in 1:m - a = randn(m) - R = qraddcol(A, R, a) + a = randn(T, m) + R = qraddcol(A, R, a, zero(T)) A = [A a] qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) @test norm(R - view(Rin,1:i, 1:i)) < 1e-10 From f3edd5b9bc61a9a907781556f226636443e59550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Tue, 5 Nov 2024 13:08:09 -0300 Subject: [PATCH 06/14] Fix dimensions problem and improve matrix shift for memory efficiency --- src/QRupdate.jl | 32 ++++++++++++++++---------------- test/runtests.jl | 15 ++++++++++----- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index abb0603..7cfbafd 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -283,36 +283,36 @@ with a column of zeros. This is useful to avoid copying the matrix. """ function qrdelcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, k::Integer) where {T} + # Note that R is n x n m, n = size(A) + mR,nR = size(R) # Shift columns. This is apparently faster than copying views. - for j in (k+1):n, i in 1:m - @inbounds R[i,j-1] = R[i, j] - @inbounds A[i,j-1] = A[i, j] - end - for i in 1:m - @inbounds R[i,n] = zero(T) - @inbounds A[i,n] = zero(T) + @inbounds for j in (k+1):n + R[:,j-1] .= @view R[:, j] + A[:,j-1] .= @view A[:, j] end + A[:,n] .= zero(T) + R[:,n] .= zero(T) - for j in k:(n-1) # Forward sweep to reduce k-th row to zeros - @inbounds G, y = givens(R[j+1,j], R[k,j], 1, 2) - @inbounds R[j+1,j] = y + @inbounds for j in k:(nR-1) # Forward sweep to reduce k-th row to zeros + G, y = givens(R[j+1,j], R[k,j], 1, 2) + R[j+1,j] = y if j Date: Wed, 27 Nov 2024 15:54:39 -0300 Subject: [PATCH 07/14] Added flag to update original matrix only when wanted --- src/QRupdate.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 16157ca..f8d3fd2 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -133,7 +133,7 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0] 0 0 0] 0 0 0] """ #function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z::zT, r::rT) where {AT,RT,aT,wT,w2T,uT,zT,rT,T} -function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}) where {T} +function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true) where {T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant @@ -170,7 +170,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector if N == 0 anorm = sqrt(anorm2) R[1,1] = anorm - view(A,:,N+1) .= a + updateMat && view(A,:,N+1) .= a return end #end #timeit norms @@ -222,7 +222,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector axpy!(1, work2_tr, u_tr) view(R,1:N,N+1) .= view(u, 1:N) R[N+1,N+1] = γ - view(A,:,N+1) .= a + updateMat && view(A,:,N+1) .= a end """ From 4bb3949644601265f7450b01fc62059f2cf39613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Fri, 3 Jan 2025 14:41:13 -0300 Subject: [PATCH 08/14] minor fixed --- src/QRupdate.jl | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index b2c3b10..34b0c97 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -8,15 +8,16 @@ function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} Base.permutecols!!(M, replace(axes(M,2), i=>j, j=>i)) end -ORTHO_TOL = 1e-6 # err = |unew|^2 / |uold|^2 < ORTHO_TOL -ORTHO_MAX_IT = 1 +ORTHO_TOL = 1e-14 # err = |unew|^2 / |uold|^2 < ORTHO_TOL +ORTHO_MAX_IT = 10 +CSNE_MAX_IT = 10 verbose = true """ Auxiliary function used to solve fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveR!(R::matT, b::vecT, sol::vecT, realSize::Int64) where {matT, vecT} +function solveR!(R::matT, b::vecT, sol::vecSol, realSize::Int64) where {matT, vecT, vecSol} # Note: R is upper triangular @inbounds sol[realSize] = b[realSize] / R[realSize, realSize] for i in (realSize-1):-1:1 @@ -32,7 +33,7 @@ end Auxiliary function used to solve transpose of fully allocated but incomplete R matrices. See documentation of qraddcol! . """ -function solveRT!(R::matT, b::vecT, sol::vecT, realSize::Int64) where {matT, vecT} +function solveRT!(R::matT, b::vecT, sol::vecSol, realSize::Int64) where {matT, vecT, vecSol} # Note: R is upper triangular @inbounds sol[1] = b[1] / R[1, 1] for i in 2:realSize @@ -185,26 +186,26 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z return end #end #timeit norms + - # work := c = A'a - mul!(work_tr, Atr', a) - solveRT!(R, work, u, N) #u = R'\c = R'\work + mul!(work_tr, Atr', a) # work := c = A'a + solveRT!(R, work_tr, u_tr, N) #u = R'\c = R'\work #@timeit "norms 2" begin - unorm2 = u'u + unorm2 = u_tr'u_tr unorm2_prev = anorm2 #end #timeit norms 2 - solveR!(R, u, z, N) #z = R\u + solveR!(R, u_tr, z_tr, N) #z = R\u copy!(r, a) mul!(r, Atr, z_tr, -1, 1) #r = a - A*z γ = norm(r) - mul!(work_tr, Atr', r) # r := c = A'r + mul!(work_tr, Atr', r) # work := c = A'r err = sqrt(work_tr'work_tr / anorm2) # Iterative refinement if err < ORTHO_TOL - view(R,1:N,N+1) .= view(u, 1:N) + view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ view(A,:,N+1) .= a return @@ -215,20 +216,18 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z #if β != 0 #axpy!(-β2, z, work) #c = c - β2*z #end - solveRT!(R, work, work2, N) # work2 := du = R'\c - solveR!(R, work2, work, N) # work := dz = R\du + solveRT!(R, work_tr, work2_tr, N) # work2 := du = R'\c + axpy!(1.0, work2_tr, u_tr) # Refine u + solveR!(R, work2_tr, work_tr, N) # work := dz = R\du axpy!(1.0, work_tr, z_tr) #z += dz # Refine z #@timeit "residual 2" begin copy!(r, a) mul!(r, Atr, z_tr, -1.0, 1.0) #r = a - A*z γ = norm(r) - work .= 0.0 mul!(work_tr, Atr', r) # work := c = A'r err = sqrt(work_tr'work_tr / anorm2) - #verbose && println(" *** Reorthogonalize ",string(i)," . Error:", err) - verbose && print("*") i += 1 @@ -238,8 +237,10 @@ function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z end # while end # if + verbose && println(" *** $(i) reorthogonalization steps. Error:", err) + axpy!(1, work2_tr, u_tr) - view(R,1:N,N+1) .= view(u, 1:N) + view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ view(A,:,N+1) .= a end @@ -423,10 +424,10 @@ function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r:: err = sqrt(work_tr'work_tr / bnorm2) i = 0 - while err > ORTHO_TOL && i < ORTHO_MAX_IT + while err > ORTHO_TOL && i < CSNE_MAX_IT - solveRT!(R, work, work2, N) # work2 := du = R'\c - solveR!(R, work2, work, N) # work := dz = R\du + solveRT!(R, work_tr, work2_tr, N) # work2 := du = R'\c + solveR!(R, work2_tr, work_tr, N) # work := dz = R\du axpy!(1.0, work_tr, sol_tr) #z += dz # Refine z copy!(r, b) @@ -434,11 +435,11 @@ function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r:: mul!(work_tr, Atr', r) # work := c = A'r err = sqrt(work_tr'work_tr / bnorm2) - #verbose && println(" *** Reorthogonalize ",string(i), " CSNE. Error:", err) - verbose && print("*") i += 1 end + + verbose && println(" *** $(i) CSNE reorthogonalization steps. Error:", err) end From 4b0b9c4780f3dedb40f0136a791be5d9d1a0c550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Thu, 9 Jan 2025 16:04:03 -0300 Subject: [PATCH 09/14] typo fix --- src/QRupdate.jl | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index aa2459e..cb75873 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -188,35 +188,33 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector err = norm(work_tr) / sqrt(anorm2) # Iterative refinement + i = 0 if err < ORTHO_TOL view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ view(A,:,N+1) .= a - return else + while err > ORTHO_TOL && i < ORTHO_MAX_IT - i = 0 - while err > ORTHO_TOL && i < ORTHO_MAX_IT + solveRT!(Rtr, work_tr, work2_tr) # work2 := du = R'\c + axpy!(1.0, work2_tr, u_tr) # Refine u + solveR!(Rtr, work2_tr, work_tr) # work := dz = R\du + axpy!(1.0, work_tr, z_tr) #z += dz # Refine z + #@timeit "residual 2" begin - solveRT!(Rtr, work_tr, work2_tr) # work2 := du = R'\c - axpy!(1.0, work2_tr, u_tr) # Refine u - solveR!(Rtr, work2_tr, work_tr) # work := dz = R\du - axpy!(1.0, work_tr, z_tr) #z += dz # Refine z - #@timeit "residual 2" begin + copy!(r, a) + mul!(r, Atr, z_tr, -1.0, 1.0) #r = a - A*z + γ = norm(r) + mul!(work_tr, Atr', r) # work := c = A'r - copy!(r, a) - mul!(r, Atr, z_tr, -1.0, 1.0) #r = a - A*z - γ = norm(r) - mul!(work_tr, Atr', r) # work := c = A'r - - err = norm(work_tr) / sqrt(anorm2) - i += 1 + err = norm(work_tr) / sqrt(anorm2) + i += 1 - #if !iszero(β) - #γ = sqrt(γ^2 + β2*norm(z)^2 + β2) - #end - end # while + #if !iszero(β) + #γ = sqrt(γ^2 + β2*norm(z)^2 + β2) + #end + end # while end # if verbose && println(" *** $(i) reorthogonalization steps. Error:", err) From 738ce4f5005897927b3ce7483c19a995a26baa73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Fri, 10 Jan 2025 09:23:21 -0300 Subject: [PATCH 10/14] Typo fix --- src/QRupdate.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index cb75873..4cd3ebe 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -9,8 +9,8 @@ function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} end ORTHO_TOL = 1e-14 # err = |unew|^2 / |uold|^2 < ORTHO_TOL -ORTHO_MAX_IT = 10 -CSNE_MAX_IT = 10 +ORTHO_MAX_IT = 1 +CSNE_MAX_IT = 1 verbose = true """ @@ -133,7 +133,6 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -#function qraddcol!(A::AT, R::RT, a::aT, N::Int64, work::wT, work2::w2T, u::uT, z::zT, r::rT) where {AT,RT,aT,wT,w2T,uT,zT,rT,T} function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true) where {T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant @@ -169,6 +168,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector anorm2 = anorm^2 if N == 0 + # First iteration is simpler anorm = sqrt(anorm2) R[1,1] = anorm updateMat && view(A,:,N+1) .= a @@ -192,7 +192,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector if err < ORTHO_TOL view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ - view(A,:,N+1) .= a + updateMat && view(A,:,N+1) .= a else while err > ORTHO_TOL && i < ORTHO_MAX_IT @@ -215,14 +215,15 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector #γ = sqrt(γ^2 + β2*norm(z)^2 + β2) #end end # while + verbose && println(" *** $(i) reorthogonalization steps. Error:", err) + + axpy!(1, work2_tr, u_tr) + view(R,1:N,N+1) .= u_tr + R[N+1,N+1] = γ + updateMat && view(A,:,N+1) .= a end # if - verbose && println(" *** $(i) reorthogonalization steps. Error:", err) - axpy!(1, work2_tr, u_tr) - view(R,1:N,N+1) .= u_tr - R[N+1,N+1] = γ - updateMat && view(A,:,N+1) .= a end """ @@ -412,7 +413,7 @@ function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r:: end - verbose && println(" *** $(i) CSNE reorthogonalization steps. Error:", err) + i > 0 && verbose && println(" *** $(i) CSNE reorthogonalization steps. Error:", err) end end # module From 4ae11bde62bca4876f26527a5f249a26b30f6d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Sat, 1 Feb 2025 13:59:41 -0300 Subject: [PATCH 11/14] Added optional timers to check bottlenecks --- Project.toml | 2 ++ src/QRupdate.jl | 36 +++++++++++++++++++++--------------- test/memory-usage-test.jl | 24 ++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 15 deletions(-) create mode 100644 test/memory-usage-test.jl diff --git a/Project.toml b/Project.toml index 32c129e..1d3062d 100644 --- a/Project.toml +++ b/Project.toml @@ -5,8 +5,10 @@ version = "1.0.1" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] +TimerOutputs = "0.5.26" julia = "1.7" [extras] diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 4cd3ebe..a28db8a 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -1,6 +1,7 @@ module QRupdate -using LinearAlgebra +using LinearAlgebra, TimerOutputs +to = TimerOutput() export qraddcol, qraddcol!, qraddrow, qrdelcol, qrdelcol!, csne, csne! @@ -145,6 +146,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector @assert size(z,1) == n @assert size(r,1) == m + @timeit to "Extract views" begin if N < n cols = 1:N Atr = view(A, :, cols) #truncated @@ -161,12 +163,12 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector u_tr = u z_tr = z end - #end #timeit get views + end # Extract views" - #@timeit "norms" begin anorm = norm(a) anorm2 = anorm^2 + @timeit to "Base case" begin if N == 0 # First iteration is simpler anorm = sqrt(anorm2) @@ -174,27 +176,31 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector updateMat && view(A,:,N+1) .= a return end - #end #timeit norms + end #timeit Base case # work := c = A'a - mul!(work_tr, Atr', a) - solveRT!(Rtr, work_tr, u_tr) #u = R'\c = R'\work - solveR!(Rtr, u_tr, z_tr) #z = R\u - copy!(r, a) - mul!(r, Atr, z_tr, -1, 1) #r = a - A*z + @timeit to "Default iteration" begin + @timeit to "A'a" mul!(work_tr, Atr', a) + @timeit to "solveRT" solveRT!(Rtr, work_tr, u_tr) #u = R'\c = R'\work + @timeit to "solveR" solveR!(Rtr, u_tr, z_tr) #z = R\u + @timeit to "copy" copy!(r, a) + @timeit to "mul!" mul!(r, Atr, z_tr, -1, 1) #r = a - A*z γ = norm(r) - mul!(work_tr, Atr', r) # r := c = A'r + @timeit to "mul: c=A'r" mul!(work_tr, Atr', r) # r := c = A'r err = norm(work_tr) / sqrt(anorm2) + end # timeit Default iteration # Iterative refinement i = 0 if err < ORTHO_TOL + @timeit to "No refinement update" begin view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ updateMat && view(A,:,N+1) .= a + end # timeit No refinement else - while err > ORTHO_TOL && i < ORTHO_MAX_IT + @timeit "Reorthogonalize" while err > ORTHO_TOL && i < ORTHO_MAX_IT solveRT!(Rtr, work_tr, work2_tr) # work2 := du = R'\c axpy!(1.0, work2_tr, u_tr) # Refine u @@ -217,10 +223,10 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector end # while verbose && println(" *** $(i) reorthogonalization steps. Error:", err) - axpy!(1, work2_tr, u_tr) - view(R,1:N,N+1) .= u_tr - R[N+1,N+1] = γ - updateMat && view(A,:,N+1) .= a + @timeit to axpy!(1, work2_tr, u_tr) + @timeit to "Update R" view(R,1:N,N+1) .= u_tr + @timeit to "Update R" R[N+1,N+1] = γ + @timeit "Update A" updateMat && view(A,:,N+1) .= a end # if diff --git a/test/memory-usage-test.jl b/test/memory-usage-test.jl new file mode 100644 index 0000000..db982bc --- /dev/null +++ b/test/memory-usage-test.jl @@ -0,0 +1,24 @@ +using QRupdate, TimerOutputs + +m = 1000 +n = 10 +T = ComplexF64 +work = rand(T, n) +work2 = rand(T, n) +work3 = rand(T, n) +work4 = rand(T, n) +work5 = rand(T, m) +Rin = zeros(T,n,n) +Ain = zeros(T,m,n) + +reset_timer!(QRupdate.to) +for i in 1:n + a = randn(T, m) + @timeit QRupdate.to "add col" qraddcol!(Ain, Rin, a, i-1, work, work2, work3, work4, work5) + Qin = view(Ain, :, 1:i)*inv(view(Rin, 1:i, 1:i)) +end + +println(QRupdate.to) +println("N of rows: $(m)") + + From 7d85fdb7b31b195ec16bcc41c84cc63fe3d87db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Mon, 3 Feb 2025 17:54:39 -0300 Subject: [PATCH 12/14] Added parameters for reorthogonalization, option to obtain number of reortho and removed global parameters --- src/QRupdate.jl | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index a28db8a..9cdb8f6 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -9,10 +9,6 @@ function swapcols!(M::Matrix{T},i::Int,j::Int) where {T} Base.permutecols!!(M, replace(axes(M,2), i=>j, j=>i)) end -ORTHO_TOL = 1e-14 # err = |unew|^2 / |uold|^2 < ORTHO_TOL -ORTHO_MAX_IT = 1 -CSNE_MAX_IT = 1 -verbose = true """ Triangular solve `Rx = b`, where `R` is upper triangular of size `realSize x realSize`. The storage of `R` is described in the documentation for `qraddcol!`. @@ -25,8 +21,7 @@ function solveR!(R::AbstractMatrix{T}, b::AbstractVector{T}, sol::AbstractVector # provide a view only, so they do not incur in further memory allocations. I # verified this with BenchmarkTooks. # N Barnafi 06/11/24 - sol .= b - ldiv!(UpperTriangular(R), sol) + ldiv!(sol, UpperTriangular(R), b) end """ @@ -35,8 +30,7 @@ Triangular solve `R'x = b`, where `R` is upper triangular of size `realSize x re function solveRT!(R::AbstractMatrix{T}, b::AbstractVector{T}, sol::AbstractVector{T}) where {T} # Note: R is upper triangular. # Note 2: We solve for the conjugate transpose. - sol .= b - ldiv!(LowerTriangular(R'), sol) + ldiv!(sol, LowerTriangular(R'), b) end @@ -134,11 +128,10 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true) where {T} +function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true, log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant - #@timeit "get views" begin m, n = size(A) @assert size(work,1) == n "Expected "*string(n)*", actual size: " * string(size(work)) @assert size(work2,1) == n @@ -193,20 +186,19 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector # Iterative refinement i = 0 - if err < ORTHO_TOL + if err < ortho_tol @timeit to "No refinement update" begin view(R,1:N,N+1) .= u_tr R[N+1,N+1] = γ updateMat && view(A,:,N+1) .= a end # timeit No refinement else - @timeit "Reorthogonalize" while err > ORTHO_TOL && i < ORTHO_MAX_IT + @timeit "Reorthogonalize" while err > ortho_tol && i < ortho_max_it solveRT!(Rtr, work_tr, work2_tr) # work2 := du = R'\c axpy!(1.0, work2_tr, u_tr) # Refine u solveR!(Rtr, work2_tr, work_tr) # work := dz = R\du axpy!(1.0, work_tr, z_tr) #z += dz # Refine z - #@timeit "residual 2" begin copy!(r, a) mul!(r, Atr, z_tr, -1.0, 1.0) #r = a - A*z @@ -215,11 +207,6 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector err = norm(work_tr) / sqrt(anorm2) i += 1 - - - #if !iszero(β) - #γ = sqrt(γ^2 + β2*norm(z)^2 + β2) - #end end # while verbose && println(" *** $(i) reorthogonalization steps. Error:", err) @@ -227,9 +214,9 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector @timeit to "Update R" view(R,1:N,N+1) .= u_tr @timeit to "Update R" R[N+1,N+1] = γ @timeit "Update A" updateMat && view(A,:,N+1) .= a + log && return i end # if - - + return 0 end """ @@ -330,8 +317,6 @@ function qrdelcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, k::Integer) where end R[j+1,j] = zero(T) end - #end # timeit shift row - #end #timeit all end """ @@ -365,7 +350,7 @@ function csne(Rin::AbstractMatrix{T}, A::AbstractMatrix{T}, b::Vector{T}) where return (x, r) end -function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r::rT, N::Int) where {RT,AT,bT,solT,wT,w2T,uT,rT} +function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r::rT, N::Int; log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {RT,AT,bT,solT,wT,w2T,uT,rT} #c,u,sol,du are R^n. Only r is R^m #c -> work; du -> work2. dsol is redundant. @@ -404,7 +389,7 @@ function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r:: err = norm(work_tr) / bnorm i = 0 - while err > ORTHO_TOL && i < CSNE_MAX_IT + while err > ortho_tol && i < ortho_max_it solveRT!(Rtr, work_tr, work2_tr) # work2 := du = R'\c solveR!(Rtr, work2_tr, work_tr) # work := dz = R\du @@ -420,6 +405,7 @@ function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r:: end i > 0 && verbose && println(" *** $(i) CSNE reorthogonalization steps. Error:", err) + log && return i end end # module From 0575c54c1e7082348ed344dcdb7ee0e4004fc2d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Mon, 3 Feb 2025 17:59:28 -0300 Subject: [PATCH 13/14] Forgot to add verbose flag --- src/QRupdate.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 9cdb8f6..64cf722 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -128,7 +128,7 @@ R = [0 0 0 R = [r11 0 0 R = [r11 r12 0 0 0 0 0 0 0 0 r22 0 0 0 0] 0 0 0] 0 0 0] """ -function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true, log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {T} +function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector{T}, N::Int64, work::AbstractVector{T}, work2::AbstractVector{T}, u::AbstractVector{T}, z::AbstractVector{T}, r::AbstractVector{T}; updateMat::Bool=true, verbose::Bool=false, log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {T} #c,u,z,du,dz are R^n. Only r is R^m #c -> work; du -> work2. dz is redundant @@ -350,7 +350,7 @@ function csne(Rin::AbstractMatrix{T}, A::AbstractMatrix{T}, b::Vector{T}) where return (x, r) end -function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r::rT, N::Int; log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {RT,AT,bT,solT,wT,w2T,uT,rT} +function csne!(R::RT, A::AT, b::bT, sol::solT, work::wT, work2::w2T, u::uT, r::rT, N::Int; verbose::Bool=false, log::Bool=false, ortho_tol::Float64=1e-14, ortho_max_it::Int=1) where {RT,AT,bT,solT,wT,w2T,uT,rT} #c,u,sol,du are R^n. Only r is R^m #c -> work; du -> work2. dsol is redundant. From d2de4979fdc7d022d7c6858b83e59ab5217b5d7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Barnafi?= Date: Mon, 3 Feb 2025 18:36:56 -0300 Subject: [PATCH 14/14] minor bug --- src/QRupdate.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QRupdate.jl b/src/QRupdate.jl index 64cf722..8a2eb09 100644 --- a/src/QRupdate.jl +++ b/src/QRupdate.jl @@ -167,7 +167,7 @@ function qraddcol!(A::AbstractMatrix{T}, R::AbstractMatrix{T}, a::AbstractVector anorm = sqrt(anorm2) R[1,1] = anorm updateMat && view(A,:,N+1) .= a - return + return 0 end end #timeit Base case