Skip to content

Commit 756e4b9

Browse files
committed
Add read-only support for zipped Dirfiles.
Separate from the Dirfile encoding scheme, GetData will read Dirfiles contained in uncompressed Zip files. This functionality is meant for reading archival data, so writing to these Zip files is not supported. Using the Info-ZIP `zip` utility, a Zip file can be created by running `zip -r0 ../dirfile.zip *` from within the root of an existing Dirfile. All encoding schemes are supported by this functionality except for the two encoding schemes that already use Zip files, *zzip* and *zzslim*. The encoding scheme must be specified using the /ENCODING directive, even if the Dirfile is unencoded. For /INCLUDE directives and LINTERP field look up table files, only relative paths are supported and only without `./` and `../` syntax. Although Zip files are most commonly created using _Deflate_ compression, the Zip standard (ISO/IEC 21320-1) also supports _Store_ compression, i.e., no compression at all. GetData's Zip file support requires _Store_ compression for all data files, although either _Store_ compression or _Deflate_ compression can be used for any *format* files or any LINTERP field look up table files. With _Store_ compression, a Zip file effectively concatenates a Dirfile's individual files together into a single file. Since a Zip file contains an offset table, unlike a tarball, random reads are supported without the need to load the entire file from disk.
1 parent 1cd1a4b commit 756e4b9

42 files changed

Lines changed: 3323 additions & 78 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ MODULE_LIBS=libgetdata.la
8080
if USE_MODULES
8181
EXPORT_DYNAMIC=-export-dynamic
8282
DGETDATA_MODULEDIR=-DGETDATA_MODULEDIR="\"$(moduledir)\""
83+
EXTERNAL_CPPFLAGS=$(ZZIP_CPPFLAGS)
84+
EXTERNAL_LDFLAGS=$(ZZIP_LDFLAGS)
85+
EXTERNAL_LIBS=$(ZZIP_LIBS)
8386
else
8487
EXTERNAL_CPPFLAGS=$(SLIM_CPPFLAGS) $(GZIP_CPPFLAGS) $(BZIP2_CPPFLAGS) \
8588
$(LZMA_CPPFLAGS) $(ZZIP_CPPFLAGS) $(FLAC_CPPFLAGS)

src/ascii.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ int _GD_AsciiOpen(int fd, struct gd_raw_file_* file, gd_type_t type gd_unused_,
2828
dtrace("%i, %p, <unused>, <unused>, %u", fd, file, mode);
2929

3030
if (!(mode & GD_FILE_TEMP))
31-
file->idata = gd_OpenAt(file->D, fd, file->name, ((mode & GD_FILE_WRITE)
31+
file->idata = gd_openat_wrapper(file->D, fd, file->name, ((mode & GD_FILE_WRITE)
3232
? (O_RDWR | O_CREAT) : O_RDONLY) | O_BINARY, 0666);
3333
else
3434
file->idata = _GD_MakeTempFile(file->D, fd, file->name);
@@ -49,6 +49,7 @@ int _GD_AsciiOpen(int fd, struct gd_raw_file_* file, gd_type_t type gd_unused_,
4949

5050
file->mode = mode | GD_FILE_READ;
5151
file->pos = 0;
52+
file->start_offset = ftello64(file->edata);
5253
dreturn("%i", 0);
5354
return 0;
5455
}
@@ -61,7 +62,7 @@ off64_t _GD_AsciiSeek(struct gd_raw_file_* file, off64_t count,
6162
dtrace("%p, %" PRId64 ", <unused>, 0x%X", file, (int64_t)count, mode);
6263

6364
if (count < file->pos) {
64-
rewind((FILE *)file->edata);
65+
fseeko64((FILE *)file->edata, file->start_offset, SEEK_SET); /* rewind */
6566
file->pos = 0;
6667
}
6768

@@ -285,7 +286,7 @@ off64_t _GD_AsciiSize(int dirfd, struct gd_raw_file_* file,
285286

286287
dtrace("%i, %p, <unused>, <unused>", dirfd, file);
287288

288-
fd = gd_OpenAt(file->D, dirfd, file->name, O_RDONLY, 0666);
289+
fd = gd_openat_wrapper(file->D, dirfd, file->name, O_RDONLY, 0666);
289290
if (fd < 0) {
290291
dreturn("%i", -1);
291292
return -1;

src/bzip.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ static struct gd_bzdata *_GD_Bzip2DoOpen(int dirfd, struct gd_raw_file_* file,
5656
file->error = BZ_IO_ERROR;
5757

5858
if (mode & GD_FILE_READ) {
59-
fd = gd_OpenAt(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
59+
fd = gd_openat_wrapper(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
6060
} else if (mode & GD_FILE_TEMP) {
6161
fd = _GD_MakeTempFile(file->D, dirfd, file->name);
6262
fdmode = "wb";

src/close.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ static void _GD_FreeD(DIRFILE *D, int keep_dirfile)
7676
}
7777
free(D->dir);
7878

79+
#ifdef HAVE_ZZIP_LIB_H
80+
if (D->zzip_dir)
81+
zzip_dir_close(D->zzip_dir);
82+
#endif
83+
7984
if (!keep_dirfile)
8085
free(D);
8186

src/common.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ static int lutcmp(const void* a, const void* b)
346346
*/
347347
int _GD_ReadLinterpFile(DIRFILE *restrict D, gd_entry_t *restrict E)
348348
{
349-
FILE *fp;
349+
FILE *fp = NULL;
350350
struct gd_lut_ *ptr;
351351
int i, fd;
352352
int dir = -1;
@@ -364,15 +364,23 @@ int _GD_ReadLinterpFile(DIRFILE *restrict D, gd_entry_t *restrict E)
364364
return 1;
365365
}
366366

367-
fd = gd_OpenAt(D, E->e->u.linterp.table_dirfd, E->e->u.linterp.table_file,
368-
O_RDONLY, 0666);
369-
if (fd == -1) {
370-
_GD_SetError(D, GD_E_IO, GD_E_IO_OPEN, E->EN(linterp,table), 0, NULL);
371-
dreturn("%i", 1);
372-
return 1;
367+
if (!D->zzip_dir) {
368+
fd = gd_openat_wrapper(D, E->e->u.linterp.table_dirfd, E->e->u.linterp.table_file,
369+
O_RDONLY, 0666);
370+
if (fd == -1) {
371+
_GD_SetError(D, GD_E_IO, GD_E_IO_OPEN, E->EN(linterp,table), 0, NULL);
372+
dreturn("%i", 1);
373+
return 1;
374+
}
375+
fp = fdopen(fd, "rb");
376+
} else {
377+
if (!gd_zip_read_file(D, E->e->u.linterp.table_dirfd, E->e->u.linterp.table_file, &fp)) {
378+
_GD_SetError(D, GD_E_IO, GD_E_IO_OPEN, E->EN(linterp,table), 0, NULL);
379+
dreturn("%i", 1);
380+
return 1;
381+
}
373382
}
374383

375-
fp = fdopen(fd, "rb");
376384
if (fp == NULL) {
377385
_GD_SetError(D, GD_E_IO, GD_E_IO_OPEN, E->EN(linterp,table), 0, NULL);
378386
dreturn("%i", 1);
@@ -1430,9 +1438,13 @@ int _GD_GrabDir(DIRFILE *D, int dirfd, const char *name, int canonical)
14301438
free(path);
14311439

14321440
if (D->dir[D->ndir].fd == -1) {
1433-
free(D->dir[D->ndir].path);
1434-
dreturn("%i", -1);
1435-
return -1;
1441+
if (D->zzip_dir) {
1442+
D->dir[D->ndir].fd = dup(dirfd);
1443+
} else {
1444+
free(D->dir[D->ndir].path);
1445+
dreturn("%i", -1);
1446+
return -1;
1447+
}
14361448
}
14371449
#endif
14381450
D->ndir++;

src/flac.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ static struct gd_flacdata *_GD_FlacDoOpen(int dirfd, struct gd_raw_file_* file,
151151
dtrace("%i, %p, 0x%X, %i, 0x%X", dirfd, file, data_type, swap, mode);
152152

153153
if (mode & GD_FILE_READ) {
154-
fd = gd_OpenAt(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
154+
fd = gd_openat_wrapper(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
155155
} else if (mode & GD_FILE_TEMP) {
156156
fd = _GD_MakeTempFile(file->D, dirfd, file->name);
157157
fdmode = "wb";

src/gzip.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ int _GD_GzipOpen(int fd, struct gd_raw_file_* file,
4646
dtrace("%i, %p, <unused>, <unused>, 0x%X", fd, file, mode);
4747

4848
if (mode & GD_FILE_READ) {
49-
file->idata = gd_OpenAt(file->D, fd, file->name, O_RDONLY | O_BINARY, 0666);
49+
file->idata = gd_openat_wrapper(file->D, fd, file->name, O_RDONLY | O_BINARY, 0666);
5050
gzmode = "r";
5151
} else if (mode & GD_FILE_TEMP) {
5252
file->idata = _GD_MakeTempFile(file->D, fd, file->name);
@@ -185,17 +185,32 @@ off64_t _GD_GzipSize(int dirfd, struct gd_raw_file_ *file, gd_type_t data_type,
185185

186186
dtrace("%i, %p, 0x%X, <unused>", dirfd, file, data_type);
187187

188-
fd = gd_OpenAt(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
188+
fd = gd_openat_wrapper(file->D, dirfd, file->name, O_RDONLY | O_BINARY, 0666);
189189
if (fd < 0) {
190190
dreturn("%i", -1);
191191
return -1;
192192
}
193193

194194
/* seek to the end */
195-
if (lseek64(fd, -4, SEEK_END) == -1) {
196-
dreturn("%i", -1);
197-
return -1;
195+
#ifdef HAVE_ZZIP_LIB_H
196+
if (file->D->zzip_dir) {
197+
ZZIP_FILE *zzip_file = zzip_file_open(file->D->zzip_dir, file->name, O_RDONLY | O_BINARY);
198+
if (zzip_file && zzip_file->method == 0) {
199+
lseek64(dirfd, zzip_file->dataoffset + zzip_file->csize - 4, SEEK_SET);
200+
zzip_file_close(zzip_file);
201+
} else {
202+
dreturn("%i", -1);
203+
return -1;
204+
}
205+
} else {
206+
#endif
207+
if (lseek64(fd, -4, SEEK_END) == -1) {
208+
dreturn("%i", -1);
209+
return -1;
210+
}
211+
#ifdef HAVE_ZZIP_LIB_H
198212
}
213+
#endif
199214
if (read(fd, &size, 4) < 4) {
200215
dreturn("%i", -1);
201216
return -1;

src/include.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,14 @@ int _GD_Include(DIRFILE *D, struct parser_state *p, const char *ename,
253253

254254
/* Open the containing directory */
255255
dirfd = _GD_GrabDir(D, D->fragment[parent].dirfd, temp_buf1, 1);
256-
if (dirfd == -1 && D->error == GD_E_OK)
256+
if (dirfd == -1 && D->error == GD_E_OK && !D->zzip_dir)
257257
_GD_SetError(D, GD_E_IO, GD_E_IO_INCL, p->file, p->line, ename);
258258
if (D->error)
259259
goto include_error;
260260

261261
/* Reject weird stuff */
262262
if (gd_StatAt(D, dirfd, base, &statbuf, 0)) {
263-
if (!(p->flags & GD_CREAT)) {
263+
if (!(p->flags & GD_CREAT) && !D->zzip_dir) {
264264
_GD_SetError(D, GD_E_IO, GD_E_IO_INCL, p->file, p->line, temp_buf1);
265265
_GD_ReleaseDir(D, dirfd);
266266
goto include_error;
@@ -280,19 +280,33 @@ int _GD_Include(DIRFILE *D, struct parser_state *p, const char *ename,
280280
}
281281

282282
/* Try to open the file */
283-
i = gd_OpenAt(D, dirfd, base,
284-
((p->flags & (GD_CREAT | GD_TRUNC)) ? O_RDWR : O_RDONLY) |
285-
((p->flags & GD_CREAT) ? O_CREAT : 0) |
286-
((p->flags & GD_TRUNC) ? O_TRUNC : 0) |
287-
((p->flags & GD_EXCL) ? O_EXCL : 0) | O_BINARY, 0666);
283+
if (D->zzip_dir) {
284+
i = open(D->name, O_RDONLY | O_BINARY);
285+
} else {
286+
i = gd_OpenAt(D, dirfd, base,
287+
((p->flags & (GD_CREAT | GD_TRUNC)) ? O_RDWR : O_RDONLY) |
288+
((p->flags & GD_CREAT) ? O_CREAT : 0) |
289+
((p->flags & GD_TRUNC) ? O_TRUNC : 0) |
290+
((p->flags & GD_EXCL) ? O_EXCL : 0) | O_BINARY, 0666);
291+
}
288292

289293
if (i < 0) {
290294
_GD_SetError(D, GD_E_IO, GD_E_IO_INCL, p->file, p->line, temp_buf1);
291295
_GD_ReleaseDir(D, dirfd);
292296
goto include_error;
293297
}
294298

295-
new_fp = fdopen(i, (p->flags & (GD_CREAT | GD_TRUNC)) ? "rb+" : "rb");
299+
if (!D->zzip_dir) {
300+
new_fp = fdopen(i, (p->flags & (GD_CREAT | GD_TRUNC)) ? "rb+" : "rb");
301+
} else {
302+
if (!gd_zip_read_file(D, dirfd, base, &new_fp)) {
303+
_GD_SetError(D, GD_E_IO, GD_E_IO_INCL, p->file, p->line, temp_buf1);
304+
_GD_ReleaseDir(D, dirfd);
305+
close(i);
306+
goto include_error;
307+
}
308+
close(i);
309+
}
296310

297311
/* If opening the file failed, set the error code and abort parsing. */
298312
if (new_fp == NULL) {

src/internal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@
113113
#include <regex.h>
114114
#endif
115115

116+
#ifdef HAVE_ZZIP_LIB_H
117+
#include <zzip/lib.h>
118+
#include <zzip/file.h>
119+
#endif
116120

117121
/* MSCVRT defines size_t but not ssize_t */
118122
#ifdef __MSVCRT__
@@ -994,6 +998,7 @@ struct gd_raw_file_ {
994998
DIRFILE *D;
995999
unsigned int mode;
9961000
off64_t pos;
1001+
off64_t start_offset;
9971002
};
9981003

9991004
/* linterp table datum */
@@ -1283,6 +1288,13 @@ struct gd_dirfile_ {
12831288
/* syntax error callback */
12841289
gd_parser_callback_t sehandler;
12851290
void* sehandler_extra;
1291+
1292+
/* for zipped Dirfile support */
1293+
#ifdef HAVE_ZZIP_LIB_H
1294+
ZZIP_DIR *zzip_dir;
1295+
#else
1296+
void *zzip_dir;
1297+
#endif
12861298
};
12871299

12881300
/* The caller's preferred memory manager */
@@ -1434,6 +1446,9 @@ gd_entry_t *_GD_ParseFieldSpec(DIRFILE *restrict,
14341446
char *_GD_ParseFragment(FILE *restrict, DIRFILE*, struct parser_state *restrict,
14351447
int, int);
14361448
void _GD_PerformRename(DIRFILE *restrict, struct gd_rename_data_ *restrict);
1449+
int gd_openat_wrapper(const DIRFILE *D, int dirfd, const char *name, int flags,
1450+
mode_t mode);
1451+
int gd_zip_read_file(const DIRFILE *D, int dirfd, const char *name, FILE **fp);
14371452
struct gd_rename_data_ *_GD_PrepareRename(DIRFILE *restrict, char *restrict,
14381453
size_t, gd_entry_t *restrict, int, unsigned);
14391454
int _GD_ReadLinterpFile(DIRFILE *restrict, gd_entry_t *restrict);

src/iopos.c

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,5 +415,112 @@ off_t gd_seek(DIRFILE *D, const char *field_code, off_t frame_num,
415415
{
416416
return (off_t)gd_seek64(D, field_code, frame_num, sample_num, whence);
417417
}
418+
419+
int gd_openat_wrapper(const DIRFILE *D, int dirfd, const char *name, int flags,
420+
mode_t mode)
421+
{
422+
int ret;
423+
const char *dir = NULL;
424+
const char *p1, *p2, *p;
425+
char *newp;
426+
#ifdef HAVE_ZZIP_LIB_H
427+
ZZIP_FILE *zzip_file;
428+
#endif
429+
430+
dtrace("%p, %i, \"%s\", 0x%X, 0%o", D, dirfd, name, flags, mode);
431+
432+
#ifdef HAVE_ZZIP_LIB_H
433+
if (!D->zzip_dir) {
434+
#endif
435+
ret = gd_OpenAt(D, dirfd, name, flags, mode);
436+
#ifdef HAVE_ZZIP_LIB_H
437+
} else {
438+
dir = _GD_DirName(D, dirfd);
439+
/* find where zip file and desired file's absolute paths differ */
440+
for (p1 = dir, p2 = D->dir[0].path; *p1 && *p1 == *p2; p1++, p2++);
441+
p = *(p1) && *(p1+1) ? p1+1 : NULL;
442+
if (p) {
443+
/* if paths differ (i.e. not root of zip), construct relative path */
444+
newp = malloc(strlen(p) + 1 + strlen(name) + 1);
445+
strcpy(newp, p);
446+
newp[strlen(p)] = '/';
447+
strcpy(newp + strlen(p) + 1, name);
448+
zzip_file = zzip_file_open(D->zzip_dir, newp, flags | O_BINARY);
449+
free(newp);
450+
} else {
451+
/* root of zip */
452+
zzip_file = zzip_file_open(D->zzip_dir, name, flags | O_BINARY);
453+
}
454+
if (zzip_file && zzip_file->method == 0) {
455+
lseek64(dirfd, zzip_file->dataoffset, SEEK_SET);
456+
zzip_file_close(zzip_file);
457+
ret = dup(dirfd);
458+
} else {
459+
ret = EACCES;
460+
}
461+
}
462+
#endif
463+
464+
dreturn("%i", ret);
465+
return ret;
466+
}
467+
468+
int gd_zip_read_file(const DIRFILE *D, int dirfd, const char *name, FILE **fp)
469+
{
470+
int ret;
471+
const char *p1, *p2, *p;
472+
char *newp;
473+
#ifdef HAVE_ZZIP_LIB_H
474+
ZZIP_FILE *zzip_file;
475+
#endif
476+
const char *dir = NULL;
477+
478+
dtrace("%p, %i, \"%s\", %p", D, dirfd, name, fp);
479+
480+
#ifdef HAVE_ZZIP_LIB_H
481+
dir = _GD_DirName(D, dirfd);
482+
/* find where zip file and desired file's absolute paths differ */
483+
if (D->ndir > 0) {
484+
for (p1 = dir, p2 = D->dir[0].path; *p1 && *p1 == *p2; p1++, p2++);
485+
p = *(p1) && *(p1+1) ? p1+1 : NULL;
486+
} else {
487+
p = NULL;
488+
}
489+
if (p) {
490+
/* if paths differ (i.e. not root of zip), construct relative path */
491+
newp = malloc(strlen(p) + 1 + strlen(name) + 1);
492+
strcpy(newp, p);
493+
newp[strlen(p)] = '/';
494+
strcpy(newp + strlen(p) + 1, name);
495+
zzip_file = zzip_file_open(D->zzip_dir, newp, O_RDONLY | O_BINARY);
496+
free(newp);
497+
} else {
498+
/* root of zip */
499+
zzip_file = zzip_file_open(D->zzip_dir, name, O_RDONLY | O_BINARY);
500+
}
501+
if (zzip_file /*&& zzip_file->method == 0*/) {
502+
char *contents = malloc(zzip_file->usize + 1);
503+
if (!zzip_file_read(zzip_file, contents, zzip_file->usize)) {
504+
dreturn("%i", 0);
505+
return 0;
506+
}
507+
contents[zzip_file->usize] = 0;
508+
*fp = fmemopen(NULL, zzip_file->usize + 1, "r+");
509+
fwrite(contents, 1, zzip_file->usize + 1, *fp);
510+
rewind(*fp);
511+
free(contents);
512+
zzip_file_close(zzip_file);
513+
ret = 1;
514+
} else {
515+
ret = 0;
516+
}
517+
518+
dreturn("%i", ret);
519+
return ret;
520+
#else
521+
dreturn("%i", 0);
522+
return 0;
523+
#endif
524+
}
418525
/* vim: ts=2 sw=2 et tw=80
419526
*/

0 commit comments

Comments
 (0)