diff --git a/internal/archive/archive.go b/internal/archive/archive.go index e24a9c24..19378669 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) { type fetchFlags uint const ( - fetchBulk fetchFlags = 1 << iota + fetchBulk fetchFlags = 1 << iota + fetchGzip fetchDefault fetchFlags = 0 ) +var notFoundErr = fmt.Errorf("cannot find archive data") + var httpClient = &http.Client{ Timeout: 30 * time.Second, } @@ -334,9 +337,32 @@ func (index *ubuntuIndex) fetchIndex() error { } logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component) - reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk) - if err != nil { - return err + + // Prefer acquire-by-hash when the archive advertises it. By-hash URLs + // are content-addressed and so are immune to the inconsistent view of + // InRelease vs Packages.gz that mirrors and CDNs can serve while a + // publication is propagating. See https://wiki.ubuntu.com/AptByHash. + gzPath := packagesPath + ".gz" + var reader io.ReadSeekCloser + if index.release.Get("Acquire-By-Hash") == "yes" { + gzDigest, _, _ := control.ParsePathInfo(digests, gzPath) + if gzDigest != "" { + byHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, gzDigest) + r, err := index.fetch(index.distPath(byHashPath), digest, fetchBulk|fetchGzip) + if err != nil && err != notFoundErr { + return err + } + // On 404 fall through to the named-path fetch below: the hash + // may have been garbage-collected on the mirror. + reader = r + } + } + if reader == nil { + r, err := index.fetch(index.distPath(gzPath), digest, fetchBulk|fetchGzip) + if err != nil { + return err + } + reader = r } ctrl, err := control.ParseReader("Package", reader) if err != nil { @@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS case 401: return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label) case 404: - return nil, fmt.Errorf("cannot find archive data") + return nil, notFoundErr default: return nil, fmt.Errorf("error from archive: %v", resp.Status) } body := resp.Body - if strings.HasSuffix(path, ".gz") { + if flags&fetchGzip != 0 { reader, err := gzip.NewReader(body) if err != nil { return nil, fmt.Errorf("cannot decompress data: %v", err) diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index 8ca840e5..8d4e7e61 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -4,6 +4,8 @@ import ( "golang.org/x/crypto/openpgp/packet" . "gopkg.in/check.v1" + "bytes" + "compress/gzip" "debug/elf" "errors" "flag" @@ -83,14 +85,18 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) { s.request = req s.requests = append(s.requests, req) body := s.response + status := s.status s.logf("Request: %s", req.URL.String()) if response, ok := s.responses[path.Clean(req.URL.Path)]; ok { body = string(response) + } else if len(s.responses) > 0 && s.status == 200 { + // Unknown path with responses populated: behave like a real archive. + status = 404 } rsp := &http.Response{ Body: io.NopCloser(strings.NewReader(body)), Header: s.header, - StatusCode: s.status, + StatusCode: status, } return rsp, s.err } @@ -625,6 +631,97 @@ func read(r io.Reader) string { return string(data) } +func gzipBytes(s string) []byte { + var buf bytes.Buffer + w := gzip.NewWriter(&buf) + if _, err := w.Write([]byte(s)); err != nil { + panic(err) + } + if err := w.Close(); err != nil { + panic(err) + } + return buf.Bytes() +} + +func (s *httpSuite) sawByHashRequest() bool { + for _, req := range s.requests { + if strings.Contains(req.URL.Path, "/by-hash/SHA256/") { + return true + } + } + return false +} + +func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) { + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.AcquireByHash = true + r.NamedPathContent = map[string][]byte{ + "main/binary-amd64/Packages.gz": gzipBytes("stale Packages from previous publication"), + } + }) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + testArchive, err := archive.Open(&options) + c.Assert(err, IsNil) + + pkg, _, err := testArchive.Fetch("mypkg1") + c.Assert(err, IsNil) + c.Assert(read(pkg), Equals, "mypkg1 1.1 data") + c.Assert(s.sawByHashRequest(), Equals, true) +} + +func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) { + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.AcquireByHash = true + r.ByHashSkip = []string{"main/binary-amd64/Packages.gz"} + }) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + testArchive, err := archive.Open(&options) + c.Assert(err, IsNil) + + pkg, _, err := testArchive.Fetch("mypkg1") + c.Assert(err, IsNil) + c.Assert(read(pkg), Equals, "mypkg1 1.1 data") + c.Assert(s.sawByHashRequest(), Equals, true) +} + +func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) { + s.prepareArchive("jammy", "22.04", "amd64", []string{"main"}) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + _, err := archive.Open(&options) + c.Assert(err, IsNil) + c.Assert(s.sawByHashRequest(), Equals, false) +} + // ---------------------------------------------------------------------------------------- // Real archive tests, only enabled via: // 1. --real-archive for non-Pro archives (e.g. standard jammy archive), diff --git a/internal/archive/testarchive/testarchive.go b/internal/archive/testarchive/testarchive.go index ea5b8e08..02b6dd3e 100644 --- a/internal/archive/testarchive/testarchive.go +++ b/internal/archive/testarchive/testarchive.go @@ -107,6 +107,10 @@ type Release struct { Label string Items []Item PrivKey *packet.PrivateKey + // Fields below model acquire-by-hash and mirror inconsistencies for tests. + AcquireByHash bool + ByHashSkip []string + NamedPathContent map[string][]byte } func (r *Release) Walk(f func(Item) error) error { @@ -127,6 +131,10 @@ func (r *Release) Content() []byte { content := item.Content() fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path()) } + acquireByHash := "" + if r.AcquireByHash { + acquireByHash = "Acquire-By-Hash: yes\n" + } content := fmt.Sprintf(string(testutil.Reindent(` Origin: Ubuntu Label: %s @@ -137,9 +145,9 @@ func (r *Release) Content() []byte { Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x Components: main restricted universe multiverse Description: Ubuntu %s - SHA256: + %sSHA256: %s - `)), r.Label, r.Suite, r.Version, r.Version, digests.String()) + `)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String()) var buf bytes.Buffer writer, err := clearsign.Encode(&buf, r.PrivKey, nil) @@ -158,14 +166,27 @@ func (r *Release) Content() []byte { } func (r *Release) Render(prefix string, content map[string][]byte) error { + skipByHash := make(map[string]bool, len(r.ByHashSkip)) + for _, p := range r.ByHashSkip { + skipByHash[p] = true + } return r.Walk(func(item Item) error { itemPath := item.Path() + itemContent := item.Content() if strings.HasPrefix(itemPath, "pool/") { - itemPath = path.Join(prefix, itemPath) + content[path.Join(prefix, itemPath)] = itemContent + return nil + } + distItemPath := path.Join(prefix, "dists", r.Suite, itemPath) + if override, ok := r.NamedPathContent[itemPath]; ok { + content[distItemPath] = override } else { - itemPath = path.Join(prefix, "dists", r.Suite, itemPath) + content[distItemPath] = itemContent + } + if r.AcquireByHash && itemPath != r.Path() && !skipByHash[itemPath] { + byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent)) + content[byHashPath] = itemContent } - content[itemPath] = item.Content() return nil }) }