11module ZarrExt
2- using YAXArrayBase
3- using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor
4- import YAXArrayBase: YAXArrayBase as YAB
5- export ZarrDataset
6-
7- function __init__ ()
8- @debug " new driver key :zarr, updating backendlist."
9- YAB. backendlist[:zarr ] = ZarrDataset
10- push! (YAB. backendregex, r" (.zarr$)|(.zarr/$)" => ZarrDataset)
11- end
2+ using YAXArrayBase
3+ using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor, ZipStore
4+ import DiskArrays: AbstractDiskArray, DiskArrays, Unchunked, Chunked, GridChunks
5+ using ZipArchives: ZipReader
6+ import YAXArrayBase: YAXArrayBase as YAB
7+ export ZarrDataset
128
13- struct ZarrDataset
14- g:: ZGroup
15- end
16- ZarrDataset (g:: String ;mode= " r" ) = ZarrDataset (zopen (g,mode,fill_as_missing= false ))
17-
18- YAB. get_var_dims (ds:: ZarrDataset ,name) = reverse (ds[name]. attrs[" _ARRAY_DIMENSIONS" ])
19- YAB. get_varnames (ds:: ZarrDataset ) = collect (keys (ds. g. arrays))
20- function YAB. get_var_attrs (ds:: ZarrDataset , name)
21- # We add the fill value to the attributes to be consistent with NetCDF
22- a = ds[name]
23- if a. metadata. fill_value != = nothing
24- merge (ds[name]. attrs,Dict (" _FillValue" => a. metadata. fill_value))
25- else
26- ds[name]. attrs
27- end
9+ function __init__ ()
10+ @debug " new driver key :zarr, updating backendlist."
11+ YAB. backendlist[:zarr ] = ZarrDataset
12+ push! (YAB. backendregex, r" (.zarr$)|(.zarr/$)|(zarr.zip$)" => ZarrDataset)
13+ end
14+
15+ struct ZarrDataset
16+ g:: ZGroup
17+ end
18+ function ZarrDataset (g:: String ; mode= " r" )
19+ store = if endswith (g, " zip" )
20+ ZipStore (ZipReader (SimpleFileDiskArray (g)))
21+ else
22+ g
2823 end
29- YAB. get_global_attrs (ds:: ZarrDataset ) = ds. g. attrs
30- Base. getindex (ds:: ZarrDataset , i) = ds. g[i]
31- Base. haskey (ds:: ZarrDataset ,k) = haskey (ds. g,k)
32-
33- # function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
34- # S = Base.nonmissingtype(T)
35- # add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
36- # end
37-
38- function YAB. add_var (p:: ZarrDataset , T:: Type , varname, s, dimnames, attr;
39- chunksize= s, fill_as_missing= false , kwargs... )
40- attr2 = merge (attr,Dict (" _ARRAY_DIMENSIONS" => reverse (collect (dimnames))))
41- fv = get (attr," _FillValue" ,get (attr," missing_value" ,YAB. defaultfillval (T)))
42- za = zcreate (T, p. g, varname,s... ;fill_value = fv,fill_as_missing,attrs= attr2,chunks= chunksize,kwargs... )
43- za
24+ ZarrDataset (zopen (store, mode, fill_as_missing= false ))
25+ end
26+
27+ YAB. get_var_dims (ds:: ZarrDataset , name) = reverse (ds[name]. attrs[" _ARRAY_DIMENSIONS" ])
28+ YAB. get_varnames (ds:: ZarrDataset ) = collect (keys (ds. g. arrays))
29+ function YAB. get_var_attrs (ds:: ZarrDataset , name)
30+ # We add the fill value to the attributes to be consistent with NetCDF
31+ a = ds[name]
32+ if a. metadata. fill_value != = nothing
33+ merge (ds[name]. attrs, Dict (" _FillValue" => a. metadata. fill_value))
34+ else
35+ ds[name]. attrs
4436 end
37+ end
38+ YAB. get_global_attrs (ds:: ZarrDataset ) = ds. g. attrs
39+ Base. getindex (ds:: ZarrDataset , i) = ds. g[i]
40+ Base. haskey (ds:: ZarrDataset , k) = haskey (ds. g, k)
4541
46- # Special case for init with Arrays
47- function YAB. add_var (p:: ZarrDataset , a:: AbstractArray , varname, dimnames, attr;
48- kwargs... )
49- T = to_zarrtype (a)
50- b = add_var (p,T,varname,size (a),dimnames,attr;kwargs... )
51- b .= a
52- a
42+ # function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
43+ # S = Base.nonmissingtype(T)
44+ # add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
45+ # end
46+
47+ function YAB. add_var (p:: ZarrDataset , T:: Type , varname, s, dimnames, attr;
48+ chunksize= s, fill_as_missing= false , kwargs... )
49+ attr2 = merge (attr, Dict (" _ARRAY_DIMENSIONS" => reverse (collect (dimnames))))
50+ fv = get (attr, " _FillValue" , get (attr, " missing_value" , YAB. defaultfillval (T)))
51+ attr3 = filter (attr2) do (k, v)
52+ ! isa (v, AbstractFloat) || ! isnan (v)
5353 end
54+ za = zcreate (T, p. g, varname, s... ; fill_value= fv, fill_as_missing, attrs= attr3, chunks= chunksize, kwargs... )
55+ za
56+ end
57+
58+ # Special case for init with Arrays
59+ function YAB. add_var (p:: ZarrDataset , a:: AbstractArray , varname, dimnames, attr;
60+ kwargs... )
61+ T = to_zarrtype (a)
62+ b = add_var (p, T, varname, size (a), dimnames, attr; kwargs... )
63+ b .= a
64+ a
65+ end
66+
67+ YAB. create_empty (:: Type{ZarrDataset} , path, gatts= Dict ()) = ZarrDataset (zgroup (path, attrs= gatts))
5468
55- YAB. create_empty (:: Type{ZarrDataset} , path, gatts= Dict ()) = ZarrDataset (zgroup (path, attrs= gatts))
5669
5770
71+ YAB. allow_parallel_write (:: ZarrDataset ) = true
72+ YAB. allow_missings (:: ZarrDataset ) = false
73+ YAB. to_dataset (g:: ZGroup ; kwargs... ) = ZarrDataset (g)
74+ YAB. iscompressed (a:: ZArray{<:Any,<:Any,<:Compressor} ) = true
75+
76+
77+ # Add ability to read zipped zarrs
78+
79+
80+ struct SimpleFileDiskArray{C<: Union{Int,Nothing} } <: AbstractDiskArray{UInt8,1}
81+ file:: String
82+ s:: Int
83+ chunksize:: C
84+ end
85+ Base. size (s:: SimpleFileDiskArray ) = (s. s,)
86+ function SimpleFileDiskArray (filename; chunksize= nothing )
87+ isfile (filename) || throw (ArgumentError (" File $filename does not exist" ))
88+ s = filesize (filename)
89+ SimpleFileDiskArray (filename, s, chunksize)
90+ end
91+ function DiskArrays. readblock! (a:: SimpleFileDiskArray , aout, i:: AbstractUnitRange )
92+ open (a. file) do f
93+ seek (f, first (i) - 1 )
94+ read! (f, aout)
95+ end
96+ end
97+ DiskArrays. haschunks (a:: SimpleFileDiskArray ) = a. chunksize === nothing ? Unchunked () : Chunked ()
98+ function DiskArrays. eachchunk (a:: SimpleFileDiskArray )
99+ if a. chunksize === nothing
100+ DiskArrays. estimate_chunksize (a)
101+ else
102+ GridChunks ((a. s,), (a. chunksize,))
103+ end
104+ end
105+
58106
59- YAB. allow_parallel_write (:: ZarrDataset ) = true
60- YAB. allow_missings (:: ZarrDataset ) = false
61- YAB. to_dataset (g:: ZGroup ; kwargs... ) = ZarrDataset (g)
62- YAB. iscompressed (a:: ZArray{<:Any,<:Any,<:Compressor} ) = true
63107
64108end
0 commit comments