From 05b211362f7c3f10ddf9309d38f133efcd79927f Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Tue, 11 May 2021 16:17:17 +0100 Subject: [PATCH 1/5] Use std of 0 for singleton vectors --- src/scaling.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scaling.jl b/src/scaling.jl index 3fd4b7b..444e078 100644 --- a/src/scaling.jl +++ b/src/scaling.jl @@ -68,7 +68,8 @@ struct MeanStdScaling <: AbstractScaling end end -compute_stats(x) = (mean(x), std(x)) +# Set std to 0 using corrected=false if x is a singleton +compute_stats(x) = (mean(x), std(x; corrected=(length(x) != 1)) function _apply(A::AbstractArray, scaling::MeanStdScaling; inverse=false, eps=1e-3, kwargs...) inverse && return scaling.μ .+ scaling.σ .* A From 1a8183bab65c5846bf86e7b86e60a99f5b3fcf85 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Tue, 11 May 2021 16:28:48 +0100 Subject: [PATCH 2/5] Add test for singleton vector --- test/scaling.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/scaling.jl b/test/scaling.jl index 7033413..90dba3f 100644 --- a/test/scaling.jl +++ b/test/scaling.jl @@ -312,6 +312,12 @@ scaling = MeanStdScaling(x) @test FeatureTransforms.apply_append(x, scaling, append_dim=1) == vcat(x, expected) end + + @testset "singleton" begin + x = [2.] + scaling = MeanStdScaling(x) + @test FeatureTransforms.apply(x, scaling) == [0.] + end end @testset "Matrix" begin From 4b6b514b426d97ec5f89a7e9c5685b35d164eb79 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Tue, 11 May 2021 16:29:05 +0100 Subject: [PATCH 3/5] Bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5dec6c4..554f125 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FeatureTransforms" uuid = "8fd68953-04b8-4117-ac19-158bf6de9782" authors = ["Invenia Technical Computing Corporation"] -version = "0.3.6" +version = "0.3.7" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" From e402c786ba48efa1395e5a0e4618eb96a5cae2e9 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Tue, 11 May 2021 16:33:43 +0100 Subject: [PATCH 4/5] Add bracket --- src/scaling.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scaling.jl b/src/scaling.jl index 444e078..c577005 100644 --- a/src/scaling.jl +++ b/src/scaling.jl @@ -69,7 +69,7 @@ struct MeanStdScaling <: AbstractScaling end # Set std to 0 using corrected=false if x is a singleton -compute_stats(x) = (mean(x), std(x; corrected=(length(x) != 1)) +compute_stats(x) = (mean(x), std(x; corrected=(length(x) != 1))) function _apply(A::AbstractArray, scaling::MeanStdScaling; inverse=false, eps=1e-3, kwargs...) inverse && return scaling.μ .+ scaling.σ .* A From 5ce668d24b8ee9090bc007589971281c2bd187bf Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 12 May 2021 11:37:06 +0100 Subject: [PATCH 5/5] Expose corrected kwarg as alternative --- src/scaling.jl | 19 ++++++++++--------- test/scaling.jl | 26 ++++++++++++++++++++------ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/scaling.jl b/src/scaling.jl index c577005..5cdfe06 100644 --- a/src/scaling.jl +++ b/src/scaling.jl @@ -31,8 +31,8 @@ struct MeanStdScaling <: AbstractScaling σ::Real """ - MeanStdScaling(A::AbstractArray; dims=:, inds=:) -> MeanStdScaling - MeanStdScaling(table, [cols]) -> MeanStdScaling + MeanStdScaling(A::AbstractArray; dims=:, inds=:, corrected=true) -> MeanStdScaling + MeanStdScaling(table, [cols], corrected=true) -> MeanStdScaling Construct a [`MeanStdScaling`](@ref) transform from the statistics of the given data. By default _all the data_ is considered when computing the mean and standard deviation. @@ -46,30 +46,31 @@ struct MeanStdScaling <: AbstractScaling # `AbstractArray` keyword arguments * `dims=:`: the dimension along which to take the `inds` slices. Default uses all dims. * `inds=:`: the indices to use in computing the statistics. Default uses all indices. + * `corrected=true`: passed to `Statistics.std`. # `Table` keyword arguments * `cols`: the columns to use in computing the statistics. Default uses all columns. + * `corrected=true`: passed to `Statistics.std`. !!! note If you want the `MeanStdScaling` to transform your data consistently you should use the same `inds`, `dims`, or `cols` keywords when calling `apply`. Otherwise, `apply` might rescale the wrong data or throw an error. """ - function MeanStdScaling(A::AbstractArray; dims=:, inds=:) - dims == Colon() && return new(compute_stats(A)...) - return new(compute_stats(selectdim(A, dims, inds))...) + function MeanStdScaling(A::AbstractArray; dims=:, inds=:, corrected=true) + dims == Colon() && return new(compute_stats(A; corrected=corrected)...) + return new(compute_stats(selectdim(A, dims, inds); corrected=corrected)...) end - function MeanStdScaling(table; cols=_get_cols(table)) + function MeanStdScaling(table; cols=_get_cols(table), corrected=true) Tables.istable(table) || throw(MethodError(MeanStdScaling, table)) columntable = Tables.columns(table) data = reduce(vcat, [getproperty(columntable, c) for c in _to_vec(cols)]) - return new(compute_stats(data)...) + return new(compute_stats(data; corrected=corrected)...) end end -# Set std to 0 using corrected=false if x is a singleton -compute_stats(x) = (mean(x), std(x; corrected=(length(x) != 1))) +compute_stats(x; corrected) = (mean(x), std(x; corrected=corrected)) function _apply(A::AbstractArray, scaling::MeanStdScaling; inverse=false, eps=1e-3, kwargs...) inverse && return scaling.μ .+ scaling.σ .* A diff --git a/test/scaling.jl b/test/scaling.jl index 90dba3f..afa762d 100644 --- a/test/scaling.jl +++ b/test/scaling.jl @@ -255,6 +255,26 @@ @test scaling.σ == 0.5 end end + + @testset "std correction" begin + @testset "singleton" begin + x = [2.] + + scaling = MeanStdScaling(x) + @test scaling.μ == 2. + @test isnan(scaling.σ) + + scaling = MeanStdScaling(x; corrected=false) + @test scaling.μ == 2. + @test scaling.σ == 0. + end + + @testset "Array" begin + scaling = MeanStdScaling(M; corrected=false) + @test scaling.μ == 0.5 + @test scaling.σ ≈ 0.81650 atol=1e-5 + end + end end @testset "Vector" begin @@ -312,12 +332,6 @@ scaling = MeanStdScaling(x) @test FeatureTransforms.apply_append(x, scaling, append_dim=1) == vcat(x, expected) end - - @testset "singleton" begin - x = [2.] - scaling = MeanStdScaling(x) - @test FeatureTransforms.apply(x, scaling) == [0.] - end end @testset "Matrix" begin