Skip to content

Commit 079e030

Browse files
authored
Zippedzarrs (#49)
* Add possiblity to open zipped zarrs * update Project.toml * Fix bug
1 parent 36d6892 commit 079e030

2 files changed

Lines changed: 101 additions & 54 deletions

File tree

Project.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "YAXArrayBase"
22
uuid = "90b8fcef-0c2d-428d-9c56-5f86629e9d14"
33
authors = ["Fabian Gans <fgans@bgc-jena.mpg.de>"]
4-
version = "0.7.7"
4+
version = "0.7.8"
55

66
[deps]
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
@@ -18,21 +18,24 @@ DimensionalData = "0.27, 0.28, 0.29"
1818
NetCDF = "0.11, 0.12"
1919
Zarr = "0.8, 0.9"
2020

21+
2122
[extensions]
2223
ArchGDALExt = "ArchGDAL"
2324
AxisArraysExt = "AxisArrays"
2425
AxisKeysExt = "AxisKeys"
2526
DimensionalDataExt = "DimensionalData"
2627
NamedDimsExt = "NamedDims"
2728
NetCDFExt = "NetCDF"
28-
ZarrExt = "Zarr"
29+
ZarrExt = ["Zarr", "ZipArchives", "DiskArrays"]
2930

3031
[weakdeps]
3132
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
3233
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
3334
AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
3435
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
3536
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
37+
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
3638
NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
3739
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
3840
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
41+
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"

ext/ZarrExt.jl

Lines changed: 96 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,108 @@
11
module ZarrExt
2-
using YAXArrayBase
3-
using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor
4-
import YAXArrayBase: YAXArrayBase as YAB
5-
export ZarrDataset
6-
7-
function __init__()
8-
@debug "new driver key :zarr, updating backendlist."
9-
YAB.backendlist[:zarr] = ZarrDataset
10-
push!(YAB.backendregex, r"(.zarr$)|(.zarr/$)"=>ZarrDataset)
11-
end
2+
using YAXArrayBase
3+
using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor, ZipStore
4+
import DiskArrays: AbstractDiskArray, DiskArrays, Unchunked, Chunked, GridChunks
5+
using ZipArchives: ZipReader
6+
import YAXArrayBase: YAXArrayBase as YAB
7+
export ZarrDataset
128

13-
struct ZarrDataset
14-
g::ZGroup
15-
end
16-
ZarrDataset(g::String;mode="r") = ZarrDataset(zopen(g,mode,fill_as_missing=false))
17-
18-
YAB.get_var_dims(ds::ZarrDataset,name) = reverse(ds[name].attrs["_ARRAY_DIMENSIONS"])
19-
YAB.get_varnames(ds::ZarrDataset) = collect(keys(ds.g.arrays))
20-
function YAB.get_var_attrs(ds::ZarrDataset, name)
21-
#We add the fill value to the attributes to be consistent with NetCDF
22-
a = ds[name]
23-
if a.metadata.fill_value !== nothing
24-
merge(ds[name].attrs,Dict("_FillValue"=>a.metadata.fill_value))
25-
else
26-
ds[name].attrs
27-
end
9+
function __init__()
10+
@debug "new driver key :zarr, updating backendlist."
11+
YAB.backendlist[:zarr] = ZarrDataset
12+
push!(YAB.backendregex, r"(.zarr$)|(.zarr/$)|(zarr.zip$)" => ZarrDataset)
13+
end
14+
15+
struct ZarrDataset
16+
g::ZGroup
17+
end
18+
function ZarrDataset(g::String; mode="r")
19+
store = if endswith(g, "zip")
20+
ZipStore(ZipReader(SimpleFileDiskArray(g)))
21+
else
22+
g
2823
end
29-
YAB.get_global_attrs(ds::ZarrDataset) = ds.g.attrs
30-
Base.getindex(ds::ZarrDataset, i) = ds.g[i]
31-
Base.haskey(ds::ZarrDataset,k) = haskey(ds.g,k)
32-
33-
# function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
34-
# S = Base.nonmissingtype(T)
35-
# add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
36-
# end
37-
38-
function YAB.add_var(p::ZarrDataset, T::Type, varname, s, dimnames, attr;
39-
chunksize=s, fill_as_missing=false, kwargs...)
40-
attr2 = merge(attr,Dict("_ARRAY_DIMENSIONS"=>reverse(collect(dimnames))))
41-
fv = get(attr,"_FillValue",get(attr,"missing_value",YAB.defaultfillval(T)))
42-
za = zcreate(T, p.g, varname,s...;fill_value = fv,fill_as_missing,attrs=attr2,chunks=chunksize,kwargs...)
43-
za
24+
ZarrDataset(zopen(store, mode, fill_as_missing=false))
25+
end
26+
27+
YAB.get_var_dims(ds::ZarrDataset, name) = reverse(ds[name].attrs["_ARRAY_DIMENSIONS"])
28+
YAB.get_varnames(ds::ZarrDataset) = collect(keys(ds.g.arrays))
29+
function YAB.get_var_attrs(ds::ZarrDataset, name)
30+
#We add the fill value to the attributes to be consistent with NetCDF
31+
a = ds[name]
32+
if a.metadata.fill_value !== nothing
33+
merge(ds[name].attrs, Dict("_FillValue" => a.metadata.fill_value))
34+
else
35+
ds[name].attrs
4436
end
37+
end
38+
YAB.get_global_attrs(ds::ZarrDataset) = ds.g.attrs
39+
Base.getindex(ds::ZarrDataset, i) = ds.g[i]
40+
Base.haskey(ds::ZarrDataset, k) = haskey(ds.g, k)
4541

46-
#Special case for init with Arrays
47-
function YAB.add_var(p::ZarrDataset, a::AbstractArray, varname, dimnames, attr;
48-
kwargs...)
49-
T = to_zarrtype(a)
50-
b = add_var(p,T,varname,size(a),dimnames,attr;kwargs...)
51-
b .= a
52-
a
42+
# function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
43+
# S = Base.nonmissingtype(T)
44+
# add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
45+
# end
46+
47+
function YAB.add_var(p::ZarrDataset, T::Type, varname, s, dimnames, attr;
48+
chunksize=s, fill_as_missing=false, kwargs...)
49+
attr2 = merge(attr, Dict("_ARRAY_DIMENSIONS" => reverse(collect(dimnames))))
50+
fv = get(attr, "_FillValue", get(attr, "missing_value", YAB.defaultfillval(T)))
51+
attr3 = filter(attr2) do (k, v)
52+
!isa(v, AbstractFloat) || !isnan(v)
5353
end
54+
za = zcreate(T, p.g, varname, s...; fill_value=fv, fill_as_missing, attrs=attr3, chunks=chunksize, kwargs...)
55+
za
56+
end
57+
58+
#Special case for init with Arrays
59+
function YAB.add_var(p::ZarrDataset, a::AbstractArray, varname, dimnames, attr;
60+
kwargs...)
61+
T = to_zarrtype(a)
62+
b = add_var(p, T, varname, size(a), dimnames, attr; kwargs...)
63+
b .= a
64+
a
65+
end
66+
67+
YAB.create_empty(::Type{ZarrDataset}, path, gatts=Dict()) = ZarrDataset(zgroup(path, attrs=gatts))
5468

55-
YAB.create_empty(::Type{ZarrDataset}, path, gatts=Dict()) = ZarrDataset(zgroup(path, attrs=gatts))
5669

5770

71+
YAB.allow_parallel_write(::ZarrDataset) = true
72+
YAB.allow_missings(::ZarrDataset) = false
73+
YAB.to_dataset(g::ZGroup; kwargs...) = ZarrDataset(g)
74+
YAB.iscompressed(a::ZArray{<:Any,<:Any,<:Compressor}) = true
75+
76+
77+
#Add ability to read zipped zarrs
78+
79+
80+
struct SimpleFileDiskArray{C<:Union{Int,Nothing}} <: AbstractDiskArray{UInt8,1}
81+
file::String
82+
s::Int
83+
chunksize::C
84+
end
85+
Base.size(s::SimpleFileDiskArray) = (s.s,)
86+
function SimpleFileDiskArray(filename; chunksize=nothing)
87+
isfile(filename) || throw(ArgumentError("File $filename does not exist"))
88+
s = filesize(filename)
89+
SimpleFileDiskArray(filename, s, chunksize)
90+
end
91+
function DiskArrays.readblock!(a::SimpleFileDiskArray, aout, i::AbstractUnitRange)
92+
open(a.file) do f
93+
seek(f, first(i) - 1)
94+
read!(f, aout)
95+
end
96+
end
97+
DiskArrays.haschunks(a::SimpleFileDiskArray) = a.chunksize === nothing ? Unchunked() : Chunked()
98+
function DiskArrays.eachchunk(a::SimpleFileDiskArray)
99+
if a.chunksize === nothing
100+
DiskArrays.estimate_chunksize(a)
101+
else
102+
GridChunks((a.s,), (a.chunksize,))
103+
end
104+
end
105+
58106

59-
YAB.allow_parallel_write(::ZarrDataset) = true
60-
YAB.allow_missings(::ZarrDataset) = false
61-
YAB.to_dataset(g::ZGroup; kwargs...) = ZarrDataset(g)
62-
YAB.iscompressed(a::ZArray{<:Any,<:Any,<:Compressor}) = true
63107

64108
end

0 commit comments

Comments
 (0)