Skip to content

Commit 7280cfb

Browse files
authored
Create .tar.gz alongside current .json.gz in search snapshot bucket. (dart-lang#9327)
1 parent 740d5d2 commit 7280cfb

5 files changed

Lines changed: 111 additions & 24 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
22
AppEngine version, listed here to ease deployment and troubleshooting.
33

44
## Next Release (replace with git tag when deployed)
5+
* Note: Started writing search snapshots in `.tar.gz` files.
56

67
## `20260327t133800-all`
78
* Image proxy is now disabled for all users.

app/lib/search/backend.dart

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,6 @@ class SearchBackend {
498498
});
499499
return bytes!;
500500
}
501-
502-
Future<void> close() async {
503-
_snapshotStorage.close();
504-
}
505501
}
506502

507503
/// Returns a new search form that may override predicates to their canonical forms.

app/lib/service/services.dart

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ Future<R> _withPubServices<R>(FutureOr<R> Function() fn) async {
305305
),
306306
),
307307
);
308-
registerScopeExitCallback(searchBackend.close);
309308
registerSearchClient(SearchClient());
310309
registerScopeExitCallback(searchClient.close);
311310
registerSearchAdapter(SearchAdapter());

app/lib/shared/storage.dart

Lines changed: 79 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import 'package:path/path.dart' as p;
1717
import 'package:pool/pool.dart';
1818
import 'package:pub_dev/shared/env_config.dart';
1919
import 'package:retry/retry.dart';
20+
import 'package:tar/tar.dart';
2021

2122
import 'configuration.dart';
2223
import 'utils.dart'
@@ -381,8 +382,8 @@ Future uploadBytesWithRetry(
381382
class VersionedJsonStorage {
382383
final Bucket _bucket;
383384
final String _prefix;
384-
final String _extension = '.json.gz';
385-
Timer? _oldGcTimer;
385+
final String _jsonGzExtension = '.json.gz';
386+
final String _tarGzExtension = '.tar.gz';
386387

387388
VersionedJsonStorage(Bucket bucket, String prefix)
388389
: _bucket = bucket,
@@ -394,12 +395,33 @@ class VersionedJsonStorage {
394395

395396
/// Upload the current data to the storage bucket.
396397
Future<void> uploadDataAsJsonMap(Map<String, dynamic> map) async {
397-
final objectName = _objectName();
398-
final bytes = _gzip.encode(jsonUtf8Encoder.convert(map));
398+
final jsonGzObjectName = _jsonGzObjectName();
399399
try {
400-
await uploadBytesWithRetry(_bucket, objectName, bytes);
400+
final bytes = _gzip.encode(jsonUtf8Encoder.convert(map));
401+
await uploadBytesWithRetry(_bucket, jsonGzObjectName, bytes);
401402
} catch (e, st) {
402-
_logger.warning('Unable to upload data file: $objectName', e, st);
403+
_logger.warning('Unable to upload data file: $jsonGzObjectName', e, st);
404+
}
405+
406+
// also upload as .tar.gz
407+
final tarGzObjectName = _tarGzObjectName();
408+
try {
409+
final contentBytes = jsonUtf8Encoder.convert(map);
410+
final stream = Stream<TarEntry>.fromIterable([
411+
TarEntry(
412+
TarHeader(
413+
name: 'snapshot.json',
414+
size: contentBytes.length,
415+
mode: 420, // 644₈
416+
),
417+
Stream.fromIterable([contentBytes]),
418+
),
419+
]).transform(tarWriter).transform(gzip.encoder);
420+
421+
final bytes = await readByteStream(stream);
422+
await uploadBytesWithRetry(_bucket, tarGzObjectName, bytes);
423+
} catch (e, st) {
424+
_logger.warning('Unable to upload data file: $tarGzObjectName', e, st);
403425
}
404426
}
405427

@@ -409,7 +431,7 @@ class VersionedJsonStorage {
409431
if (version == null) {
410432
return null;
411433
}
412-
final objectName = _objectName(version);
434+
final objectName = _jsonGzObjectName(version);
413435
_logger.info('Loading snapshot: $objectName');
414436
final map = await _bucket.readWithRetry(
415437
objectName,
@@ -422,25 +444,57 @@ class VersionedJsonStorage {
422444
return map as Map<String, dynamic>;
423445
}
424446

447+
/// Gets the snapshot content of the tar.gz file decoded as JSON Map.
448+
Future<Map<String, dynamic>?> getContentAsJsonMapFromTarGz([
449+
String? version,
450+
]) async {
451+
version ??= await _detectLatestVersion();
452+
if (version == null) {
453+
return null;
454+
}
455+
final objectName = _tarGzObjectName(version);
456+
_logger.info('Loading snapshot: $objectName');
457+
final map = await _bucket.readWithRetry(objectName, (input) async {
458+
final archive = TarReader(input.transform(gzip.decoder));
459+
List<int>? bytes;
460+
while (await archive.moveNext()) {
461+
final content = await readByteStream(archive.current.contents);
462+
if (archive.current.name == 'snapshot.json') {
463+
bytes = content;
464+
break;
465+
}
466+
}
467+
if (bytes == null) {
468+
return null;
469+
}
470+
return Stream.fromIterable([
471+
bytes,
472+
]).transform(utf8.decoder).transform(json.decoder).single;
473+
});
474+
return map as Map<String, dynamic>;
475+
}
476+
425477
/// Returns the latest version of the data file matching the current version
426478
/// or created earlier.
427479
Future<String?> _detectLatestVersion() async {
428480
// checking accepted runtimes first
429481
for (final version in versions.acceptedRuntimeVersions) {
430-
final info = await _bucket.tryInfo(_objectName(version));
482+
final info = await _bucket.tryInfo(_jsonGzObjectName(version));
431483
if (info != null) {
432484
return version;
433485
}
434486
}
435487
// fallback to earlier runtimes
436-
final currentPath = _objectName();
488+
final currentPath = _jsonGzObjectName();
437489
final list = (await _bucket.listAllItemsWithRetry(prefix: _prefix))
438490
.map((entry) => entry.name)
439-
.where((name) => name.endsWith(_extension))
491+
.where((name) => name.endsWith(_jsonGzExtension))
440492
.where((name) => name.compareTo(currentPath) <= 0)
441493
.map(
442-
(name) =>
443-
name.substring(_prefix.length, name.length - _extension.length),
494+
(name) => name.substring(
495+
_prefix.length,
496+
name.length - _jsonGzExtension.length,
497+
),
444498
)
445499
.where((version) => versions.runtimeVersionPattern.hasMatch(version))
446500
.toList();
@@ -466,10 +520,16 @@ class VersionedJsonStorage {
466520
return;
467521
}
468522
final name = p.basename(entry.name);
469-
if (!name.endsWith(_extension)) {
523+
String? version;
524+
if (name.endsWith(_jsonGzExtension)) {
525+
version = name.substring(0, name.length - _jsonGzExtension.length);
526+
}
527+
if (name.endsWith(_tarGzExtension)) {
528+
version = name.substring(0, name.length - _tarGzExtension.length);
529+
}
530+
if (version == null) {
470531
return;
471532
}
472-
final version = name.substring(0, name.length - _extension.length);
473533
final matchesPattern =
474534
version.length == 10 &&
475535
versions.runtimeVersionPattern.hasMatch(version);
@@ -489,14 +549,14 @@ class VersionedJsonStorage {
489549
return DeleteCounts(found, deleted);
490550
}
491551

492-
String _objectName([String? version]) {
552+
String _jsonGzObjectName([String? version]) {
493553
version ??= versions.runtimeVersion;
494-
return '$_prefix$version$_extension';
554+
return '$_prefix$version$_jsonGzExtension';
495555
}
496556

497-
void close() {
498-
_oldGcTimer?.cancel();
499-
_oldGcTimer = null;
557+
String _tarGzObjectName([String? version]) {
558+
version ??= versions.runtimeVersion;
559+
return '$_prefix$version$_tarGzExtension';
500560
}
501561
}
502562

app/test/search/snapshot_test.dart

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) 2026, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'package:fake_gcloud/mem_storage.dart';
6+
import 'package:pub_dev/shared/storage.dart';
7+
import 'package:pub_dev/shared/versions.dart';
8+
import 'package:test/test.dart';
9+
10+
import '../shared/utils.dart';
11+
12+
void main() {
13+
group('snapshot upload and access', () {
14+
scopedTest('Both files are written', () async {
15+
final bucket = MemStorage(buckets: ['test']).bucket('test');
16+
final storage = VersionedJsonStorage(bucket, 'test/');
17+
await storage.uploadDataAsJsonMap({'data': 1});
18+
final list = await bucket.list(prefix: 'test/').toList();
19+
expect(list.map((l) => l.name).toSet(), {
20+
'test/$runtimeVersion.json.gz',
21+
'test/$runtimeVersion.tar.gz',
22+
});
23+
24+
final info = await bucket.info('test/$runtimeVersion.tar.gz');
25+
expect(info.length, 103);
26+
27+
expect(await storage.getContentAsJsonMap(), {'data': 1});
28+
expect(await storage.getContentAsJsonMapFromTarGz(), {'data': 1});
29+
});
30+
});
31+
}

0 commit comments

Comments
 (0)