From 8e74b37928d63ea641da481cd39a62539ec40581 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 16 Jan 2026 04:05:16 +0100 Subject: [PATCH] CLI command to repackage file-storage to archive-storage --- ...ed55e2c0e11bf32cf4539d38a43789f5fefe8.json | 34 ++ ...ad34d1f0b679ef9dcc327b2d34b05dda879a3.json | 14 + ...aab586d79afd85e4ff1440a5b62f09885f202.json | 14 + Cargo.lock | 2 + ...ed55e2c0e11bf32cf4539d38a43789f5fefe8.json | 34 ++ ...ad34d1f0b679ef9dcc327b2d34b05dda879a3.json | 14 + ...aab586d79afd85e4ff1440a5b62f09885f202.json | 14 + crates/bin/docs_rs_admin/Cargo.toml | 3 + crates/bin/docs_rs_admin/src/main.rs | 55 ++- crates/bin/docs_rs_admin/src/repackage.rs | 453 ++++++++++++++++++ 10 files changed, 636 insertions(+), 1 deletion(-) create mode 100644 .sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json create mode 100644 .sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json create mode 100644 .sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json create mode 100644 crates/bin/docs_rs_admin/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json create mode 100644 crates/bin/docs_rs_admin/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json create mode 100644 crates/bin/docs_rs_admin/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json create mode 100644 crates/bin/docs_rs_admin/src/repackage.rs diff --git a/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json b/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json new file mode 100644 index 000000000..2e8dd90d1 --- /dev/null +++ b/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n r.id as \"rid: ReleaseId\",\n c.name as \"name: KrateName\",\n r.version as \"version: Version\"\n FROM\n crates as c\n INNER JOIN releases as r ON c.id = r.crate_id\n WHERE\n r.archive_storage = FALSE\n ORDER BY r.id\n LIMIT $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "rid: ReleaseId", + "type_info": "Int4" + }, + { + "ordinal": 1, + "name": "name: KrateName", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "version: Version", + "type_info": "Text" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + false, + false, + false + ] + }, + "hash": "6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8" +} diff --git a/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json b/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json new file mode 100644 index 000000000..a31448679 --- /dev/null +++ b/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM compression_rels WHERE release = $1;", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3" +} diff --git a/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json b/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json new file mode 100644 index 000000000..91dfcccf4 --- /dev/null +++ b/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "\n UPDATE releases\n SET archive_storage = TRUE\n WHERE id = $1;\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202" +} diff --git a/Cargo.lock b/Cargo.lock index c595ddc22..a2120809a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1932,6 +1932,8 @@ dependencies = [ "futures-util", "pretty_assertions", "sqlx", + "tempfile", + "test-case", "tokio", "tracing", ] diff --git a/crates/bin/docs_rs_admin/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json b/crates/bin/docs_rs_admin/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json new file mode 100644 index 000000000..2e8dd90d1 --- /dev/null +++ b/crates/bin/docs_rs_admin/.sqlx/query-6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n r.id as \"rid: ReleaseId\",\n c.name as \"name: KrateName\",\n r.version as \"version: Version\"\n FROM\n crates as c\n INNER JOIN releases as r ON c.id = r.crate_id\n WHERE\n r.archive_storage = FALSE\n ORDER BY r.id\n LIMIT $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "rid: ReleaseId", + "type_info": "Int4" + }, + { + "ordinal": 1, + "name": "name: KrateName", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "version: Version", + "type_info": "Text" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + false, + false, + false + ] + }, + "hash": "6d63000cd0dc4998a80279dac1bed55e2c0e11bf32cf4539d38a43789f5fefe8" +} diff --git a/crates/bin/docs_rs_admin/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json b/crates/bin/docs_rs_admin/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json new file mode 100644 index 000000000..a31448679 --- /dev/null +++ b/crates/bin/docs_rs_admin/.sqlx/query-ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM compression_rels WHERE release = $1;", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "ee0536956084eb632cd3c9f8b10ad34d1f0b679ef9dcc327b2d34b05dda879a3" +} diff --git a/crates/bin/docs_rs_admin/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json b/crates/bin/docs_rs_admin/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json new file mode 100644 index 000000000..91dfcccf4 --- /dev/null +++ b/crates/bin/docs_rs_admin/.sqlx/query-f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "\n UPDATE releases\n SET archive_storage = TRUE\n WHERE id = $1;\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "f058669c648359063c7186d1d51aab586d79afd85e4ff1440a5b62f09885f202" +} diff --git a/crates/bin/docs_rs_admin/Cargo.toml b/crates/bin/docs_rs_admin/Cargo.toml index 29629f25d..92c4a29ce 100644 --- a/crates/bin/docs_rs_admin/Cargo.toml +++ b/crates/bin/docs_rs_admin/Cargo.toml @@ -17,10 +17,12 @@ docs_rs_database = { path = "../../lib/docs_rs_database" } docs_rs_fastly = { path = "../../lib/docs_rs_fastly" } docs_rs_headers = { path = "../../lib/docs_rs_headers" } docs_rs_logging = { path = "../../lib/docs_rs_logging" } +docs_rs_storage = { path = "../../lib/docs_rs_storage" } docs_rs_types = { path = "../../lib/docs_rs_types" } docs_rs_utils = { path = "../../lib/docs_rs_utils" } futures-util = { workspace = true } sqlx = { workspace = true } +tempfile = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } @@ -34,3 +36,4 @@ docs_rs_storage = { path = "../../lib/docs_rs_storage", features = ["testing"] } docs_rs_test_fakes = { path = "../../lib/docs_rs_test_fakes" } docs_rs_types = { path = "../../lib/docs_rs_types", features = ["testing"] } pretty_assertions = { workspace = true } +test-case = { workspace = true } diff --git a/crates/bin/docs_rs_admin/src/main.rs b/crates/bin/docs_rs_admin/src/main.rs index 8719d0050..316467817 100644 --- a/crates/bin/docs_rs_admin/src/main.rs +++ b/crates/bin/docs_rs_admin/src/main.rs @@ -1,4 +1,5 @@ mod rebuilds; +mod repackage; #[cfg(test)] pub(crate) mod testing; @@ -17,7 +18,7 @@ use docs_rs_database::{ }; use docs_rs_fastly::CdnBehaviour as _; use docs_rs_headers::SurrogateKey; -use docs_rs_types::{CrateId, KrateName, Version}; +use docs_rs_types::{CrateId, KrateName, ReleaseId, Version}; use futures_util::StreamExt; use rebuilds::queue_rebuilds_faulty_rustdoc; use std::iter; @@ -73,6 +74,8 @@ impl CommandLine { .with_meter_provider()? .with_pool() .await? + .with_storage() + .await? .with_build_queue()? .with_repository_stats()? .with_registry_api()? @@ -283,6 +286,14 @@ enum DatabaseSubcommand { version: Option, }, + /// temporary command to repackage missing crates into archive storage. + /// starts at the earliest release and works forwards. + Repackage { + /// process at most this amount of releases + #[arg(long)] + limit: Option, + }, + /// temporary command to update the `crates.latest_version_id` field UpdateLatestVersionId, @@ -320,6 +331,48 @@ impl DatabaseSubcommand { } .context("Failed to run database migrations")?, + Self::Repackage { limit } => { + let pool = ctx.pool()?; + let storage = ctx.storage()?; + let mut list_conn = pool.get_async().await?; + let mut update_conn = pool.get_async().await?; + + let limit = limit.unwrap_or(2_000_000u32); + + let mut stream = sqlx::query!( + r#"SELECT + r.id as "rid: ReleaseId", + c.name as "name: KrateName", + r.version as "version: Version" + FROM + crates as c + INNER JOIN releases as r ON c.id = r.crate_id + WHERE + r.archive_storage = FALSE + ORDER BY r.id + LIMIT $1 + "#, + limit as i64, + ) + .fetch(&mut *list_conn); + + while let Some(row) = stream.next().await { + let row = row?; + + crate::repackage::repackage( + &mut update_conn, + storage, + row.rid, + &row.name, + &row.version, + ) + .await?; + } + + Ok::<(), anyhow::Error>(()) + } + .context("Failed to repackage storage")?, + Self::UpdateLatestVersionId => { let pool = ctx.pool()?; let mut list_conn = pool.get_async().await?; diff --git a/crates/bin/docs_rs_admin/src/repackage.rs b/crates/bin/docs_rs_admin/src/repackage.rs new file mode 100644 index 000000000..37b56abe8 --- /dev/null +++ b/crates/bin/docs_rs_admin/src/repackage.rs @@ -0,0 +1,453 @@ +use anyhow::Result; +use docs_rs_storage::{AsyncStorage, FileEntry, rustdoc_archive_path, source_archive_path}; +use docs_rs_types::{CompressionAlgorithm, KrateName, ReleaseId, Version}; +use docs_rs_utils::spawn_blocking; +use futures_util::StreamExt as _; +use sqlx::Acquire as _; +use std::collections::HashSet; +use tokio::{fs, io}; +use tracing::{info, instrument}; + +/// repackage old rustdoc / source content. +/// +/// New releases are storaged as ZIP files for quite some time already, +/// from the current 1.9 million releases, only 363k are old non-archive +/// releases, where we store all the single files on the storage. +/// +/// Since I don't want to rebuild all of these, +/// and I don't even know if stuff that old can be rebuilt with current toolchains, +/// I'll just repackage the old file. +/// +/// So +/// 1. download all files for rustdoc / source from storage +/// 2. create a ZIP archive containing all these files +/// 3. upload the zip +/// 4. update database entries accordingly +/// 5. delete old files +/// +/// When that's done, I can remove all the logic in the codebase related to +/// non-archive storage. +#[instrument(skip_all, fields(rid=%rid, name=%name, version=%version))] +pub async fn repackage( + conn: &mut sqlx::PgConnection, + storage: &AsyncStorage, + rid: ReleaseId, + name: &KrateName, + version: &Version, +) -> Result<()> { + info!("repackaging"); + + let mut transaction = conn.begin().await?; + + let rustdoc_prefix = format!("rustdoc/{name}/{version}/"); + let rustdoc_archive_path = rustdoc_archive_path(name, version); + + let sources_prefix = format!("sources/{name}/{version}/"); + let source_archive_path = source_archive_path(name, version); + + let mut algs: HashSet = HashSet::new(); + + if let Some((_rustdoc_file_list, alg)) = + repackage_path(storage, &rustdoc_prefix, &rustdoc_archive_path).await? + { + algs.insert(alg); + } + + if let Some((_source_file_list, alg)) = + repackage_path(storage, &sources_prefix, &source_archive_path).await? + { + algs.insert(alg); + }; + + let affected = sqlx::query!( + r#" + UPDATE releases + SET archive_storage = TRUE + WHERE id = $1; + "#, + rid as _, + ) + .execute(&mut *transaction) + .await? + .rows_affected(); + + debug_assert!( + affected > 0, + "release not found in database. Can't update archive_storage" + ); + + sqlx::query!("DELETE FROM compression_rels WHERE release = $1;", rid as _) + .execute(&mut *transaction) + .await?; + + for alg in algs { + sqlx::query!( + "INSERT INTO compression_rels (release, algorithm) + VALUES ($1, $2) + ON CONFLICT DO NOTHING;", + rid as _, + &(alg as i32) + ) + .execute(&mut *transaction) + .await?; + } + + transaction.commit().await?; + + // TODO: validate the zip file? + + // only delete the old files when we were able to update database with `archive_storage=true`, + // and were able to validate the zip file. + storage.delete_prefix(&rustdoc_prefix).await?; + storage.delete_prefix(&sources_prefix).await?; + + Ok(()) +} + +/// repackage contents of a S3 path prefix into a single archive file. +/// +/// Not performance optimized, for now it just tries to be simple. +#[instrument(skip(storage))] +async fn repackage_path( + storage: &AsyncStorage, + prefix: &str, + target_archive: &str, +) -> Result, CompressionAlgorithm)>> { + let tempdir = spawn_blocking(|| tempfile::tempdir().map_err(Into::into)).await?; + + let mut files = 0; + let mut list = storage.list_prefix(prefix).await; + while let Some(entry) = list.next().await { + let entry = entry?; + let mut stream = storage.get_stream(&entry).await?; + + let target_path = tempdir.path().join(stream.path.trim_start_matches(prefix)); + + fs::create_dir_all(&target_path.parent().unwrap()).await?; + { + let mut output_file = fs::File::create(&target_path).await?; + io::copy(&mut stream.content, &mut output_file).await?; + output_file.sync_all().await?; + } + + files += 1; + } + + if files > 0 { + let (file_list, alg) = storage + .store_all_in_archive(target_archive, &tempdir.path()) + .await?; + + fs::remove_dir_all(&tempdir).await?; + + Ok(Some((file_list, alg))) + } else { + Ok(None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::testing::TestEnvironment; + use docs_rs_storage::{PathNotFoundError, StorageKind, source_archive_path}; + use docs_rs_types::testing::{KRATE, V1}; + use pretty_assertions::assert_eq; + use test_case::test_case; + + async fn ls(storage: &AsyncStorage) -> Vec { + storage + .list_prefix("") + .await + .filter_map(|path| async { + let Ok(path) = path else { return None }; + + if path.starts_with("rustdoc-json/") || path.starts_with("build-logs/") { + return None; + } + + Some(path.clone()) + }) + .collect::>() + .await + } + + #[test_case(StorageKind::S3)] + #[test_case(StorageKind::Memory)] + #[tokio::test(flavor = "multi_thread")] + async fn test_repackage_normal(kind: StorageKind) -> Result<()> { + let env = TestEnvironment::builder() + .storage_config(docs_rs_storage::Config::test_config_with_kind(kind)?) + .build() + .await?; + + const HTML_PATH: &str = "some/path.html"; + const HTML_CONTENT: &str = "content"; + const SOURCE_PATH: &str = "another/source.rs"; + const SOURCE_CONTENT: &str = "fn main() {}"; + + let rid = env + .fake_release() + .await + .name(&KRATE) + .archive_storage(false) + .rustdoc_file_with(HTML_PATH, HTML_CONTENT.as_bytes()) + .source_file(SOURCE_PATH, SOURCE_CONTENT.as_bytes()) + .version(V1) + .create() + .await?; + + let storage = env.storage()?; + + // confirm we can fetch the files via old file-based storage. + assert_eq!( + storage + .stream_rustdoc_file(&KRATE, &V1, None, HTML_PATH, false) + .await? + .materialize(usize::MAX) + .await? + .content, + HTML_CONTENT.as_bytes() + ); + + assert_eq!( + storage + .stream_source_file(&KRATE, &V1, None, SOURCE_PATH, false) + .await? + .materialize(usize::MAX) + .await? + .content, + SOURCE_CONTENT.as_bytes() + ); + + assert_eq!( + ls(storage).await, + vec![ + "rustdoc/krate/1.0.0/krate/index.html", + "rustdoc/krate/1.0.0/some/path.html", + "sources/krate/1.0.0/Cargo.toml", + "sources/krate/1.0.0/another/source.rs", + ] + ); + + // confirm the target archives really don't exist + for path in &[ + &rustdoc_archive_path(&KRATE, &V1), + &source_archive_path(&KRATE, &V1), + ] { + assert!(!storage.exists(path).await?); + } + + let mut conn = env.async_conn().await?; + repackage(&mut conn, storage, rid, &KRATE, &V1).await?; + + // afterwards it works with rustdoc archives. + assert_eq!( + &storage + .stream_rustdoc_file(&KRATE, &V1, None, HTML_PATH, true) + .await? + .materialize(usize::MAX) + .await? + .content, + HTML_CONTENT.as_bytes(), + ); + + // also with source archives. + assert_eq!( + &storage + .stream_source_file(&KRATE, &V1, None, SOURCE_PATH, true) + .await? + .materialize(usize::MAX) + .await? + .content, + SOURCE_CONTENT.as_bytes(), + ); + + // all new files are these (`.zip`, `.zip.index`), old files are gone. + assert_eq!( + ls(storage).await, + vec![ + "rustdoc/krate/1.0.0.zip", + "rustdoc/krate/1.0.0.zip.index", + "sources/krate/1.0.0.zip", + "sources/krate/1.0.0.zip.index", + ] + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_repackage_without_rustdoc() -> Result<()> { + let env = TestEnvironment::builder() + .storage_config(docs_rs_storage::Config::test_config_with_kind( + StorageKind::S3, + )?) + .build() + .await?; + + const HTML_PATH: &str = "some/path.html"; + const SOURCE_PATH: &str = "another/source.rs"; + const SOURCE_CONTENT: &str = "fn main() {}"; + + let rid = env + .fake_release() + .await + .name(&KRATE) + .archive_storage(false) + .rustdoc_file(HTML_PATH) // will be deleted + .source_file(SOURCE_PATH, SOURCE_CONTENT.as_bytes()) + .version(V1) + .create() + .await?; + + let storage = env.storage()?; + storage + .delete_prefix(&format!("rustdoc/{KRATE}/{V1}/")) + .await?; + + // confirm we can fetch the files via old file-based storage. + assert!( + !storage + .rustdoc_file_exists(&KRATE, &V1, None, HTML_PATH, false) + .await? + ); + + assert_eq!( + storage + .stream_source_file(&KRATE, &V1, None, SOURCE_PATH, false) + .await? + .materialize(usize::MAX) + .await? + .content, + SOURCE_CONTENT.as_bytes() + ); + + assert_eq!( + ls(storage).await, + vec![ + "sources/krate/1.0.0/Cargo.toml", + "sources/krate/1.0.0/another/source.rs", + ] + ); + + // confirm the target archives really don't exist + for path in &[ + &rustdoc_archive_path(&KRATE, &V1), + &source_archive_path(&KRATE, &V1), + ] { + assert!(!storage.exists(path).await?); + } + + let mut conn = env.async_conn().await?; + repackage(&mut conn, storage, rid, &KRATE, &V1).await?; + + // but source archive works + assert_eq!( + &storage + .stream_source_file(&KRATE, &V1, None, SOURCE_PATH, true) + .await? + .materialize(usize::MAX) + .await? + .content, + SOURCE_CONTENT.as_bytes(), + ); + + // all new files are these (`.zip`, `.zip.index`), old files are gone. + assert_eq!( + ls(storage).await, + vec!["sources/krate/1.0.0.zip", "sources/krate/1.0.0.zip.index",] + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_repackage_without_source() -> Result<()> { + let env = TestEnvironment::builder() + .storage_config(docs_rs_storage::Config::test_config_with_kind( + StorageKind::S3, + )?) + .build() + .await?; + + const HTML_PATH: &str = "some/path.html"; + const HTML_CONTENT: &str = "content"; + const SOURCE_PATH: &str = "another/source.rs"; + const SOURCE_CONTENT: &str = "fn main() {}"; + + let rid = env + .fake_release() + .await + .name(&KRATE) + .archive_storage(false) + .rustdoc_file_with(HTML_PATH, HTML_CONTENT.as_bytes()) + .source_file(SOURCE_PATH, SOURCE_CONTENT.as_bytes()) + .version(V1) + .create() + .await?; + + let storage = env.storage()?; + storage + .delete_prefix(&format!("sources/{KRATE}/{V1}/")) + .await?; + + // confirm we can fetch the files via old file-based storage. + assert_eq!( + storage + .stream_rustdoc_file(&KRATE, &V1, None, HTML_PATH, false) + .await? + .materialize(usize::MAX) + .await? + .content, + HTML_CONTENT.as_bytes() + ); + + // source file doesn't exist + assert!( + storage + .stream_source_file(&KRATE, &V1, None, SOURCE_PATH, false) + .await + .unwrap_err() + .is::() + ); + + assert_eq!( + ls(storage).await, + vec![ + "rustdoc/krate/1.0.0/krate/index.html", + "rustdoc/krate/1.0.0/some/path.html", + ] + ); + + // confirm the target archives really don't exist + for path in &[ + &rustdoc_archive_path(&KRATE, &V1), + &source_archive_path(&KRATE, &V1), + ] { + assert!(!storage.exists(path).await?); + } + + let mut conn = env.async_conn().await?; + repackage(&mut conn, storage, rid, &KRATE, &V1).await?; + + // afterwards it works with rustdoc archives. + assert_eq!( + &storage + .stream_rustdoc_file(&KRATE, &V1, None, HTML_PATH, true) + .await? + .materialize(usize::MAX) + .await? + .content, + HTML_CONTENT.as_bytes(), + ); + + // all new files are these (`.zip`, `.zip.index`), old files are gone. + assert_eq!( + ls(storage).await, + vec!["rustdoc/krate/1.0.0.zip", "rustdoc/krate/1.0.0.zip.index",] + ); + + Ok(()) + } +}