Skip to content

Commit 3a1204d

Browse files
committed
Merge branch 'lp/repack-propagate-promisor-debugging-info' into seen
When fetching objects into a lazily cloned repository, .promisor files are created with information meant to help debugging. "git repack" has been taught to carry this information forward to packfiles that are newly created. Comments? * lp/repack-propagate-promisor-debugging-info: repack-promisor: add missing headers t7703: test for promisor file content after geometric repack t7700: test for promisor file content after repack repack-promisor: preserve content of promisor files after repack repack-promisor add helper to fill promisor file after repack pack-write: add explanation to promisor file content
2 parents 97aef8f + d1e9254 commit 3a1204d

5 files changed

Lines changed: 280 additions & 21 deletions

File tree

Documentation/git-repack.adoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ other objects in that pack they already have locally.
4545
+
4646
Promisor packfiles are repacked separately: if there are packfiles that
4747
have an associated ".promisor" file, these packfiles will be repacked
48-
into another separate pack, and an empty ".promisor" file corresponding
49-
to the new separate pack will be written.
48+
into another separate pack, and a ".promisor" file corresponding to the
49+
new separate pack will be written (with arbitrary contents).
5050

5151
-A::
5252
Same as `-a`, unless `-d` is used. Then any unreachable

pack-write.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,15 @@ void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_
603603
int i, err;
604604
FILE *output = xfopen(promisor_name, "w");
605605

606+
/*
607+
* Write in the .promisor file the ref names and associated hashes,
608+
* obtained by fetch-pack, at the point of generation of the
609+
* corresponding packfile. These pieces of info are only used to make
610+
* it easier to debug issues with partial clones, as we can identify
611+
* what refs (and their associated hashes) were fetched at the time
612+
* the packfile was downloaded, and if necessary, compare those hashes
613+
* against what the promisor remote reports now.
614+
*/
606615
for (i = 0; i < nr_sought; i++)
607616
fprintf(output, "%s %s\n", oid_to_hex(&sought[i]->old_oid),
608617
sought[i]->name);

repack-promisor.c

Lines changed: 175 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
#include "git-compat-util.h"
22
#include "repack.h"
3+
#include "hash.h"
34
#include "hex.h"
5+
#include "odb.h"
46
#include "pack.h"
57
#include "packfile.h"
68
#include "path.h"
9+
#include "refs.h"
710
#include "repository.h"
811
#include "run-command.h"
12+
#include "strbuf.h"
13+
#include "string-list.h"
14+
#include "strmap.h"
15+
#include "strvec.h"
916

1017
struct write_oid_context {
1118
struct child_process *cmd;
@@ -34,10 +41,159 @@ static int write_oid(const struct object_id *oid,
3441
return 0;
3542
}
3643

44+
/*
45+
* Go through all .promisor files contained in repo (excluding those whose name
46+
* appears in not_repacked_basenames, which acts as a ignorelist), and copies
47+
* their content inside the destination file "<packtmp>-<dest_hex>.promisor".
48+
* Each line of a never repacked .promisor file is: "<oid> <ref>" (as described
49+
* in the write_promisor_file() function).
50+
* After a repack, the copied lines will be: "<oid> <ref> <time>", where <time>
51+
* is the time (in Unix time) at which the .promisor file was last modified.
52+
* Only the lines whose <oid> is present inside "<packtmp>-<dest_hex>.idx" will
53+
* be copied.
54+
* The contents of all .promisor files are assumed to be correctly formed.
55+
*/
56+
static void write_promisor_file_after_repack(struct repository *repo,
57+
const char *dest_hex,
58+
const char *packtmp,
59+
struct strset *not_repacked_basenames)
60+
{
61+
char *dest_promisor_name;
62+
char *dest_idx_name;
63+
FILE *dest;
64+
struct object_id dest_oid;
65+
struct packed_git *dest_pack, *p;
66+
struct strbuf source_promisor_name = STRBUF_INIT;
67+
struct strset seen_lines = STRSET_INIT;
68+
struct strbuf line = STRBUF_INIT;
69+
int err;
70+
71+
/* First of all, let's create and open the .promisor dest file */
72+
dest_promisor_name = mkpathdup("%s-%s.promisor", packtmp, dest_hex);
73+
dest = xfopen(dest_promisor_name, "w");
74+
75+
/*
76+
* Now let's retrieve the destination pack.
77+
* We use parse_pack_index() because dest_hex/packtmp point to the packfile
78+
* that "pack-objects" just created, which is about to become part of this
79+
* repository, but has not yet been finalized.
80+
* If we are here, we know that "pack-objects" did not fail, so
81+
* parse_pack_index() being loose in validation does not pose a problem.
82+
* If an error happens, we simply leave the ".promisor" file empty.
83+
*/
84+
if (get_oid_hex_algop(dest_hex, &dest_oid, repo->hash_algo)) {
85+
warning(_("Promisor file left empty: '%s' not a hash"), dest_hex);
86+
if (fclose(dest))
87+
die(_("Could not close '%s' promisor file"), dest_promisor_name);
88+
free(dest_promisor_name);
89+
return;
90+
}
91+
dest_idx_name = mkpathdup("%s-%s.idx", packtmp, dest_hex);
92+
dest_pack = parse_pack_index(repo, dest_oid.hash, dest_idx_name);
93+
if (!dest_pack) {
94+
warning(_("Promisor file left empty: couldn't open packfile '%s'"), dest_idx_name);
95+
if (fclose(dest))
96+
die(_("Could not close '%s' promisor file"), dest_promisor_name);
97+
free(dest_promisor_name);
98+
free(dest_idx_name);
99+
return;
100+
}
101+
102+
repo_for_each_pack(repo, p) {
103+
FILE *source;
104+
struct stat source_stat;
105+
106+
if (!p->pack_promisor)
107+
continue;
108+
109+
if (not_repacked_basenames &&
110+
strset_contains(not_repacked_basenames, pack_basename(p)))
111+
continue;
112+
113+
strbuf_reset(&source_promisor_name);
114+
strbuf_addstr(&source_promisor_name, p->pack_name);
115+
strbuf_strip_suffix(&source_promisor_name, ".pack");
116+
strbuf_addstr(&source_promisor_name, ".promisor");
117+
118+
if (stat(source_promisor_name.buf, &source_stat))
119+
die(_("File not found: %s"), source_promisor_name.buf);
120+
121+
source = xfopen(source_promisor_name.buf, "r");
122+
123+
while (strbuf_getline(&line, source) != EOF) {
124+
struct string_list line_sections = STRING_LIST_INIT_DUP;
125+
struct object_id oid;
126+
127+
/* Split line into <oid>, <ref> and <time> (if <time> exists).
128+
* Check that it was actually split into 2 or 3 parts. If it was
129+
* not, then it is malformed, so skip it.
130+
*/
131+
string_list_split(&line_sections, line.buf, " ", 3);
132+
if (line_sections.nr != 2 && line_sections.nr != 3) {
133+
string_list_clear(&line_sections, 0);
134+
continue;
135+
}
136+
137+
/* Skip the lines where <oid> is not a sane hexadecimal string */
138+
if (get_oid_hex_algop(line_sections.items[0].string,
139+
&oid, repo->hash_algo)) {
140+
string_list_clear(&line_sections, 0);
141+
continue;
142+
}
143+
/* Ignore the lines where <oid> doesn't appear in the dest_pack */
144+
if (!find_pack_entry_one(&oid, dest_pack)) {
145+
string_list_clear(&line_sections, 0);
146+
continue;
147+
}
148+
149+
/*
150+
* Skip the lines where <ref> does not have the
151+
* correct format for a refname.
152+
*/
153+
printf("%s\n", line_sections.items[1].string);
154+
if (check_refname_format(line_sections.items[1].string,
155+
REFNAME_ALLOW_ONELEVEL)) {
156+
string_list_clear(&line_sections, 0);
157+
continue;
158+
}
159+
160+
/* If <time> doesn't exist, retrieve it and add it to line */
161+
if (line_sections.nr != 3)
162+
strbuf_addf(&line, " %" PRItime,
163+
(timestamp_t)source_stat.st_mtime);
164+
165+
/* If the finalized line is new, append it to dest */
166+
if (strset_add(&seen_lines, line.buf))
167+
fprintf(dest, "%s\n", line.buf);
168+
169+
string_list_clear(&line_sections, 0);
170+
}
171+
172+
err = ferror(source);
173+
err |= fclose(source);
174+
if (err)
175+
die(_("Could not read '%s' promisor file"), source_promisor_name.buf);
176+
}
177+
178+
err = ferror(dest);
179+
err |= fclose(dest);
180+
if (err)
181+
die(_("Could not write '%s' promisor file"), dest_promisor_name);
182+
183+
close_pack_index(dest_pack);
184+
free(dest_pack);
185+
free(dest_promisor_name);
186+
free(dest_idx_name);
187+
strbuf_release(&source_promisor_name);
188+
strbuf_release(&line);
189+
strset_clear(&seen_lines);
190+
}
191+
37192
static void finish_repacking_promisor_objects(struct repository *repo,
38193
struct child_process *cmd,
39194
struct string_list *names,
40-
const char *packtmp)
195+
const char *packtmp,
196+
struct strset *not_repacked_basenames)
41197
{
42198
struct strbuf line = STRBUF_INIT;
43199
FILE *out;
@@ -47,30 +203,23 @@ static void finish_repacking_promisor_objects(struct repository *repo,
47203
out = xfdopen(cmd->out, "r");
48204
while (strbuf_getline_lf(&line, out) != EOF) {
49205
struct string_list_item *item;
50-
char *promisor_name;
51206

52207
if (line.len != repo->hash_algo->hexsz)
53208
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
54209
item = string_list_append(names, line.buf);
55210

56211
/*
57212
* pack-objects creates the .pack and .idx files, but not the
58-
* .promisor file. Create the .promisor file, which is empty.
59-
*
60-
* NEEDSWORK: fetch-pack sometimes generates non-empty
61-
* .promisor files containing the ref names and associated
62-
* hashes at the point of generation of the corresponding
63-
* packfile, but this would not preserve their contents. Maybe
64-
* concatenate the contents of all .promisor files instead of
65-
* just creating a new empty file.
213+
* ".promisor" file. To create the "".promisor" file, we don't use the
214+
* helper function write_promisor_file(), but instead we use the
215+
* specific function write_promisor_file_after_repack(), which creates
216+
* the file and appropriately fills it with the content of the
217+
* ".promisor" files used for the repack.
66218
*/
67-
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
68-
line.buf);
69-
write_promisor_file(promisor_name, NULL, 0);
219+
write_promisor_file_after_repack(repo, line.buf, packtmp,
220+
not_repacked_basenames);
70221

71222
item->util = generated_pack_populate(item->string, packtmp);
72-
73-
free(promisor_name);
74223
}
75224

76225
fclose(out);
@@ -107,7 +256,7 @@ void repack_promisor_objects(struct repository *repo,
107256
return;
108257
}
109258

110-
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
259+
finish_repacking_promisor_objects(repo, &cmd, names, packtmp, NULL);
111260
}
112261

113262
void pack_geometry_repack_promisors(struct repository *repo,
@@ -118,6 +267,7 @@ void pack_geometry_repack_promisors(struct repository *repo,
118267
{
119268
struct child_process cmd = CHILD_PROCESS_INIT;
120269
FILE *in;
270+
struct strset not_repacked_basenames = STRSET_INIT;
121271

122272
if (!geometry->promisor_split)
123273
return;
@@ -131,9 +281,15 @@ void pack_geometry_repack_promisors(struct repository *repo,
131281
in = xfdopen(cmd.in, "w");
132282
for (size_t i = 0; i < geometry->promisor_split; i++)
133283
fprintf(in, "%s\n", pack_basename(geometry->promisor_pack[i]));
134-
for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++)
135-
fprintf(in, "^%s\n", pack_basename(geometry->promisor_pack[i]));
284+
for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++) {
285+
const char *name = pack_basename(geometry->promisor_pack[i]);
286+
fprintf(in, "^%s\n", name);
287+
strset_add(&not_repacked_basenames, name);
288+
}
136289
fclose(in);
137290

138-
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
291+
finish_repacking_promisor_objects(repo, &cmd, names, packtmp,
292+
strset_get_size(&not_repacked_basenames) ? &not_repacked_basenames : NULL);
293+
294+
strset_clear(&not_repacked_basenames);
139295
}

t/t7700-repack.sh

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,4 +904,65 @@ test_expect_success 'pending objects are repacked appropriately' '
904904
)
905905
'
906906

907+
test_expect_success 'check one .promisor file content after repack' '
908+
test_when_finished rm -rf prom_test prom_before_repack &&
909+
git init prom_test &&
910+
path=prom_test/.git/objects/pack &&
911+
912+
(
913+
# Create 1 pack
914+
test_commit_bulk -C prom_test 1 &&
915+
916+
# Simulate .promisor file by creating it manually
917+
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
918+
oid=$(git -C prom_test rev-parse HEAD) &&
919+
echo "$oid ref" >"$prom" &&
920+
921+
# Repack, and check if correct
922+
git -C prom_test repack -a -d -f &&
923+
prom=$(find $path -name "*.promisor") &&
924+
# $prom should contain "$oid ref <time>"
925+
test_grep "$oid ref " "$prom" &&
926+
927+
# Save the current .promisor content, repack, and check if correct
928+
cp "$prom" prom_before_repack &&
929+
git -C prom_test repack -a -d -f &&
930+
prom=$(find $path -name "*.promisor") &&
931+
# $prom should be exactly the same as prom_before_repack
932+
test_cmp prom_before_repack "$prom"
933+
)
934+
'
935+
936+
test_expect_success 'check multiple .promisor file content after repack' '
937+
test_when_finished rm -rf prom_test prom_before_repack &&
938+
git init prom_test &&
939+
path=prom_test/.git/objects/pack &&
940+
941+
(
942+
# Create 2 packs and simulate .promisor files by creating them manually
943+
test_commit_bulk -C prom_test 1 &&
944+
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
945+
oid1=$(git -C prom_test rev-parse HEAD) &&
946+
echo "$oid1 ref1" >"$prom" &&
947+
test_commit_bulk -C prom_test 1 &&
948+
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom|d") &&
949+
oid2=$(git -C prom_test rev-parse HEAD) &&
950+
echo "$oid2 ref2" >"$prom" &&
951+
952+
# Repack, and check if correct
953+
git -C prom_test repack -a -d -f &&
954+
prom=$(find $path -name "*.promisor") &&
955+
# $prom should contain "$oid1 ref1 <time>" & "$oid2 ref2 <time>"
956+
test_grep "$oid1 ref1 " "$prom" &&
957+
test_grep "$oid2 ref2 " "$prom" &&
958+
959+
# Save the current .promisor content, repack, and check if correct
960+
cp "$prom" prom_before_repack &&
961+
git -C prom_test repack -a -d -f &&
962+
prom=$(find $path -name "*.promisor") &&
963+
# $prom should be exactly the same as prom_before_repack
964+
test_cmp prom_before_repack "$prom"
965+
)
966+
'
967+
907968
test_done

t/t7703-repack-geometric.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,4 +541,37 @@ test_expect_success 'geometric repack works with promisor packs' '
541541
)
542542
'
543543

544+
test_expect_success 'check .promisor file content after geometric repack' '
545+
test_when_finished rm -rf prom_test &&
546+
git init prom_test &&
547+
path=prom_test/.git/objects/pack &&
548+
549+
(
550+
# Create 2 packs with 3 objs each, and manually create .promisor files
551+
test_commit_bulk -C prom_test --start=1 1 && # 3 objects
552+
prom1=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
553+
oid1=$(git -C prom_test rev-parse HEAD) &&
554+
echo "$oid1 ref1" >"$prom1" &&
555+
test_commit_bulk -C prom_test --start=2 1 && # 3 objects
556+
prom2=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom1|d") &&
557+
oid2=$(git -C prom_test rev-parse HEAD) &&
558+
echo "$oid2 ref2" >"$prom2" &&
559+
560+
# Create 1 pack with 12 objs, and manually create .promisor file
561+
test_commit_bulk -C prom_test --start=3 4 && # 12 objects
562+
prom3=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom1|d; \|$prom2|d") &&
563+
oid3=$(git -C prom_test rev-parse HEAD) &&
564+
echo "$oid3 ref3" >"$prom3" &&
565+
566+
# Geometric repack, and check if correct
567+
git -C prom_test repack --geometric 2 -d &&
568+
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom3|d") &&
569+
# $prom should have repacked only the first 2 small packs, so it should only
570+
# contain the following: "$oid1 ref1 <time>" & "$oid2 ref2 <time>"
571+
test_grep "$oid1 ref1 " "$prom" &&
572+
test_grep "$oid2 ref2 " "$prom" &&
573+
test_grep ! "$oid3 ref3" "$prom"
574+
)
575+
'
576+
544577
test_done

0 commit comments

Comments
 (0)