Skip to content

Commit dfc744b

Browse files
committed
[feat] support caching data from any URL using hash, add NO_ZMAT flag
1 parent 22d297e commit dfc744b

8 files changed

Lines changed: 81 additions & 32 deletions

File tree

README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ included in the official distributions of Debian Bullseye and Ubuntu 21.04 or ne
7676
- 2024-03-22 [22435e4] [bug] fix jsonpath handling of recursive deep scans
7777
- 2024-03-21 [c9f8a20] [bug] support deep scan in cell and struct, merge struct/containers.Map
7878
- 2024-03-21 [394394a] [bug] improve jsonpath cell with deep scan
79-
- 2024-03-20 [a599e71] [feat] add jsoncache to handle _DataLink_ download cache, rename jsonpath
79+
- 2024-03-20 [a599e71] [feat] add jsoncache to handle ``_DataLink_`` download cache, rename jsonpath
8080
- 2024-02-19*[4f2edeb] [feat] support .. jsonpath operator for deep scan
8181
- 2024-01-11 [c43a758] [bug] fix missing index_esc reset, add test for automap
8282
- 2024-01-11*[ef5b472] [feat] automatically switch to map object when key length > 63
@@ -140,7 +140,7 @@ included in the official distributions of Debian Bullseye and Ubuntu 21.04 or ne
140140
- 2022-03-30 [4433a21] improve datalink uri handling to consider : inside uri
141141
- 2022-03-30 [6368409] make datalink URL query more robust
142142
- 2022-03-29 [dd9e9c6] when file suffix is missing, assume JSON feed
143-
- 2022-03-29*[07c58f3] initial support for _DataLink_ of online/local file with JSONPath ref
143+
- 2022-03-29*[07c58f3] initial support for ``_DataLink_`` of online/local file with JSONPath ref
144144
- 2022-03-29 [897b7ba] fix test for older octave
145145
- 2022-03-20 [bf03eff] force msgpack to use big-endian
146146
- 2022-03-13 [46bbfa9] support empty name key, which is valid in JSON, fix #79
@@ -398,7 +398,7 @@ for reading and writing below files types:
398398

399399
- JSON based files: ``.json`, ``.jdt`` (text JData file), ``.jmsh`` (text JMesh file),
400400
``.jnii`` (text JNIfTI file), ``.jnirs`` (text JSNIRF file)
401-
- BJData based files: ``.bjd`, ``.jdb` (binary JData file), ``.bmsh`` (binary JMesh file),
401+
- BJData based files: ``.bjd``, ``.jdb`` (binary JData file), ``.bmsh`` (binary JMesh file),
402402
``.bnii`` (binary JNIfTI file), ``.bnirs`` (binary JSNIRF file), ``.pmat`` (MATLAB session file)
403403
- UBJSON based files: ``.ubj``
404404
- MessagePack based files: ``.msgpack``

gzipdecode.m

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
if (nargin == 0)
3737
error('you must provide at least 1 input');
3838
end
39-
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
39+
40+
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');
41+
42+
if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
4043
if (nargin > 1)
4144
[varargout{1:nargout}] = zmat(varargin{1}, varargin{2:end});
4245
else

gzipencode.m

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
error('you must provide at least 1 input');
3636
end
3737

38-
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
38+
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');
39+
40+
if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
3941
[varargout{1:nargout}] = zmat(varargin{1}, 1, 'gzip');
4042
return
4143
elseif (isoctavemesh)

jdlink.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
if (opt.showlink)
8989
fprintf(1, 'downloading from URL: %s\n', uripath);
9090
end
91-
rawdata = webread(uripath);
91+
rawdata = webread(uripath, weboptions('ContentType', 'binary'));
9292
fname = [cachepath{1} filesep filename];
9393
fpath = fileparts(fname);
9494
if (~exist(fpath, 'dir'))

jsoncache.m

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,25 @@
1111
%
1212
% input:
1313
% hyperlink: if a single input is provided, the function check if it is
14-
% a hyperlink starting with http:// or https://, if so, it
15-
% trys to extract the database name, document name and file
16-
% name using NeuroJSON's standard link format as
14+
% a hyperlink starting with http://, https:// or ftp://, if
15+
% so, it trys to extract the database name, document name and
16+
% file name using NeuroJSON's standard link format as
1717
%
1818
% https://neurojson.org/io/stat.cgi?dbname=..&docname=..&file=..&size=..
1919
%
20-
% if the string does not contain a link, it is treated as a
21-
% local file path
20+
% if the URL does not follow the above format, a SHA-256 hash
21+
% will be computed based on the full URL to produce filename;
22+
% dbname is set as the first 2 letters of the hash and
23+
% docname is set to the 3rd/4th letters of the hash; the
24+
% domain name is also extracted from the URL; if the URL
25+
% contains the file's suffix, it is appended to the filename.
26+
%
27+
% if the string does not contain a link, or the link starts
28+
% with file://, it is treated as a local file path
2229
% dbname: the name of the NeuroJSON database (must exist)
2330
% docname: the name of the NeuroJSON dataset document (must exist)
2431
% filename: the name of the data file - may contain a relative folder
25-
% domain: optional, if not given, 'io' is used; otherwise, user can
32+
% domain: optional, if not given, 'default' is used; otherwise, user can
2633
% specify customized domain name
2734
%
2835
% output:
@@ -46,6 +53,13 @@
4653
% if a global variable NEUROJSON_CACHE is set in 'base', it will be
4754
% used instead of the above search paths
4855
%
56+
%
57+
% example:
58+
% [cachepath, filename] = jsoncache('https://neurojson.org/io/stat.cgi?action=get&db=openneuro&doc=ds000001&file=sub-01/anat/sub-01_inplaneT2.nii.gz&size=669578')
59+
% [cachepath, filename] = jsoncache('https://raw.githubusercontent.com/fangq/jsonlab/master/examples/example1.json')
60+
% [cachepath, filename] = jsoncache('https://neurojson.io:7777/adhd200/Brown')
61+
% [cachepath, filename] = jsoncache('https://neurojson.io:7777/openneuro/ds003805')
62+
%
4963
% -- this function is part of iso2mesh toolbox (http://iso2mesh.sf.net)
5064
%
5165

@@ -66,13 +80,13 @@
6680
end
6781

6882
if (nargin < 4)
69-
domain = 'io';
83+
domain = 'default';
7084
end
7185

7286
if (nargin == 1)
7387
link = dbname;
74-
if (isempty(regexp(link, '://', 'once')))
75-
filename = link;
88+
if (~isempty(regexp(link, '^file://', 'once')) || isempty(regexp(link, '://', 'once')))
89+
filename = regexprep(link, '^file://', '');
7690
if (exist(filename, 'file'))
7791
cachepath = filename;
7892
filename = true;
@@ -81,24 +95,45 @@
8195
else
8296
if (~isempty(regexp(link, '^https*://neurojson.org/io/', 'once')))
8397
domain = 'io';
84-
end
85-
dbname = regexp(link, '(?<=db=)[^&]+', 'match');
86-
if (~isempty(dbname))
87-
dbname = dbname{1};
8898
else
89-
dbname = '';
99+
newdomain = regexprep(regexp(link, '^(https*|ftp)://[^\/?#:]+', 'match', 'once'), '^(https*|ftp)://', '');
100+
if (~isempty(newdomain))
101+
domain = newdomain;
102+
end
90103
end
91-
docname = regexp(link, '(?<=doc=)[^&]+', 'match');
92-
if (~isempty(docname))
93-
docname = docname{1};
94-
else
95-
docname = '';
104+
dbname = regexp(link, '(?<=db=)[^&]+', 'match', 'once');
105+
docname = regexp(link, '(?<=doc=)[^&]+', 'match', 'once');
106+
filename = regexp(link, '(?<=file=)[^&]+', 'match', 'once');
107+
if (isempty(filename) && strcmp(domain, 'neurojson.io'))
108+
ref = regexp(link, '^(https*|ftp)://neurojson.io(:\d+)*(?<dbname>/[^\/]+)(?<docname>/[^\/]+)(?<filename>/[^\/?]+)*', 'names', 'once');
109+
if (~isempty(ref))
110+
if (~isempty(ref.dbname))
111+
dbname = ref.dbname(2:end);
112+
end
113+
if (~isempty(ref.docname))
114+
docname = ref.docname(2:end);
115+
end
116+
if (~isempty(ref.filename))
117+
filename = ref.filename(2:end);
118+
elseif (~isempty(dbname))
119+
if (~isempty(docname))
120+
filename = [docname '.json'];
121+
else
122+
filename = [dbname '.json'];
123+
end
124+
end
125+
end
96126
end
97-
filename = regexp(link, '(?<=file=)[^&]+', 'match');
98-
if (~isempty(filename))
99-
filename = filename{1};
100-
else
101-
filename = '';
127+
if (isempty(filename))
128+
filename = jdatahash(link);
129+
suffix = regexp(link, '\.\w{1,5}(?=([#&].*)*$)', 'match', 'once');
130+
filename = [filename suffix];
131+
if (isempty(dbname))
132+
dbname = filename(1:2);
133+
end
134+
if (isempty(docname))
135+
docname = filename(3:4);
136+
end
102137
end
103138
end
104139
end

loadjson.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@
224224
if (nargout > 1 || opt.mmaponly)
225225
mmap = {};
226226
end
227+
if (regexp(inputstr, '^\s*$'))
228+
data = [];
229+
inputlen = 0;
230+
end
227231
jsoncount = 1;
228232
while pos <= inputlen
229233
[cc, pos, w1] = next_char(inputstr, pos);

zlibdecode.m

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
if (nargin == 0)
3737
error('you must provide at least 1 input');
3838
end
39-
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
39+
40+
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');
41+
42+
if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
4043
if (nargin > 1)
4144
[varargout{1:nargout}] = zmat(varargin{1}, varargin{2:end});
4245
else

zlibencode.m

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
error('you must provide at least 1 input');
3636
end
3737

38-
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
38+
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');
39+
40+
if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
3941
[varargout{1:nargout}] = zmat(varargin{1}, 1, 'zlib');
4042
return
4143
elseif (isoctavemesh)

0 commit comments

Comments
 (0)