From cfde7b24b445224ab88e8317e3176dee13c87313 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sun, 15 Mar 2026 19:21:17 +0100 Subject: [PATCH 1/4] sitemaps: add "recent" sitemap, use last-build-date instead of release date --- ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 ++ ...6f2a16cc17c471eb199d557ab6b32656ccef8.json | 15 + ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 ++ ...e51003d90122f83429313966add5b224f5f6c.json | 34 -- ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 ++ ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 ++ ...e51003d90122f83429313966add5b224f5f6c.json | 34 -- ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 ++ ...6f2a16cc17c471eb199d557ab6b32656ccef8.json | 15 + ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 ++ ...e51003d90122f83429313966add5b224f5f6c.json | 34 -- .../bin/docs_rs_web/src/handlers/sitemap.rs | 421 ++++++++++++------ crates/bin/docs_rs_web/src/routes.rs | 4 + .../templates/core/sitemap/index.xml | 5 + 14 files changed, 540 insertions(+), 229 deletions(-) create mode 100644 .sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json create mode 100644 .sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json create mode 100644 .sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json delete mode 100644 .sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json delete mode 100644 crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json create mode 100644 crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json create mode 100644 crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json create mode 100644 crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json delete mode 100644 crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json diff --git a/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json new file mode 100644 index 000000000..cc59734f7 --- /dev/null +++ b/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" +} diff --git a/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json b/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json new file mode 100644 index 000000000..960489039 --- /dev/null +++ b/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE release_build_status\n SET last_build_time = $1\n FROM releases\n INNER JOIN crates ON crates.id = releases.crate_id\n WHERE release_build_status.rid = releases.id\n AND crates.name = $2", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz", + "Text" + ] + }, + "nullable": [] + }, + "hash": "4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8" +} diff --git a/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json new file mode 100644 index 000000000..6c012fc91 --- /dev/null +++ b/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" +} diff --git a/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json new file mode 100644 index 000000000..cc59734f7 --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json new file mode 100644 index 000000000..6c012fc91 --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json new file mode 100644 index 000000000..cc59734f7 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json b/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json new file mode 100644 index 000000000..960489039 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE release_build_status\n SET last_build_time = $1\n FROM releases\n INNER JOIN crates ON crates.id = releases.crate_id\n WHERE release_build_status.rid = releases.id\n AND crates.name = $2", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz", + "Text" + ] + }, + "nullable": [] + }, + "hash": "4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json new file mode 100644 index 000000000..6c012fc91 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/docs_rs_web/src/handlers/sitemap.rs b/crates/bin/docs_rs_web/src/handlers/sitemap.rs index b77fa3725..9de627642 100644 --- a/crates/bin/docs_rs_web/src/handlers/sitemap.rs +++ b/crates/bin/docs_rs_web/src/handlers/sitemap.rs @@ -12,18 +12,21 @@ use axum::{ response::IntoResponse, }; use axum_extra::{TypedHeader, headers::ContentType}; -use chrono::{TimeZone, Utc}; +use chrono::{Days, NaiveDate, TimeZone, Utc}; use docs_rs_mimes as mimes; -use futures_util::{StreamExt as _, pin_mut}; +use futures_util::{StreamExt as _, pin_mut, stream::BoxStream}; use tracing::{Span, error}; use tracing_futures::Instrument as _; +const RECENT_SITEMAP_DAYS: u64 = 7; + /// sitemap index #[derive(Template)] #[template(path = "core/sitemap/index.xml")] #[derive(Debug, Clone, PartialEq, Eq)] struct SitemapIndexXml { sitemaps: Vec, + recent_sitemaps: Vec, } impl_axum_webpage! { @@ -33,8 +36,21 @@ impl_axum_webpage! { pub(crate) async fn sitemapindex_handler() -> impl IntoResponse { let sitemaps: Vec = ('a'..='z').collect(); + let today = Utc::now().date_naive(); + let recent_sitemaps = (0..RECENT_SITEMAP_DAYS) + .map(|days| { + today + .checked_sub_days(Days::new(days)) + .expect("underflow when building recent sitemap dates") + .format("%F") + .to_string() + }) + .collect(); - SitemapIndexXml { sitemaps } + SitemapIndexXml { + sitemaps, + recent_sitemaps, + } } #[derive(Template)] @@ -51,41 +67,49 @@ const SITEMAP_HEADER: &[u8] = br#" const SITEMAP_FOOTER: &[u8] = b"\n"; -pub(crate) async fn sitemap_handler( - Path(letter): Path, - mut conn: DbConnection, -) -> AxumResult { - if letter.len() != 1 { - return Err(AxumNope::ResourceNotFound); - } else if let Some(ch) = letter.chars().next() - && !(ch.is_ascii_lowercase()) - { - return Err(AxumNope::ResourceNotFound); - } +fn render_sitemap_item(item: SitemapItem) -> AxumResult { + let target_name = item + .target_name + .expect("when we have rustdoc_status=true, this field is filled"); + + let item = (SitemapItemXml { + crate_name: item.name, + target_name, + last_modified: item + .last_build_time + .expect("when we have rustdoc_status=true, this field is filled") + // On Aug 27 2022 we added `` to all pages, + // so they should all get recrawled if they haven't been since then. + .max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap()) + .format("%+") + .to_string(), + }) + .render() + .map_err(|err| AxumNope::InternalError(err.into()))?; - let stream_span = Span::current(); + Ok(Bytes::from(item)) +} + +struct SitemapItem { + name: String, + target_name: Option, + last_build_time: Option>, +} +type SitemapQueryStream<'a> = BoxStream<'a, Result>; + +fn stream_sitemap(mut conn: DbConnection, query: Query) -> impl IntoResponse +where + Query: for<'a> FnOnce(&'a mut DbConnection) -> SitemapQueryStream<'a> + Send + 'static, +{ + let stream_span = Span::current(); let stream = stream!({ let mut items: usize = 0; let mut streamed_bytes: usize = SITEMAP_HEADER.len(); yield Ok(Bytes::from_static(SITEMAP_HEADER)); - let result = sqlx::query!( - r#"SELECT crates.name, - releases.target_name, - MAX(releases.release_time) as "release_time!" - FROM crates - INNER JOIN releases ON releases.crate_id = crates.id - WHERE - rustdoc_status = true AND - crates.name ILIKE $1 - GROUP BY crates.name, releases.target_name - "#, - format!("{letter}%"), - ) - .fetch(&mut *conn); - + let result = query(&mut conn); pin_mut!(result); while let Some(row) = result.next().await { let row = match row { @@ -97,33 +121,18 @@ pub(crate) async fn sitemap_handler( } }; - match (SitemapItemXml { - crate_name: row.name, - target_name: row - .target_name - .expect("when we have rustdoc_status=true, this field is filled"), - last_modified: row - .release_time - // On Aug 27 2022 we added `` to all pages, - // so they should all get recrawled if they haven't been since then. - .max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap()) - .format("%+") - .to_string(), - }) - .render() - { - Ok(item) => { - let bytes = Bytes::from(item); + match render_sitemap_item(row) { + Ok(bytes) => { items += 1; streamed_bytes += bytes.len(); yield Ok(bytes); } Err(err) => { error!(?err, "error when rendering sitemap item xml"); - yield Err(AxumNope::InternalError(err.into())); + yield Err(err); break; } - }; + } } streamed_bytes += SITEMAP_FOOTER.len(); @@ -132,115 +141,273 @@ pub(crate) async fn sitemap_handler( if items > 50_000 || streamed_bytes > 50 * 1024 * 1024 { // alert when sitemap limits are reached // https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap#general-guidelines - error!(items, streamed_bytes, letter, "sitemap limits exceeded") + error!(items, streamed_bytes, "sitemap limits exceeded"); } }) .instrument(stream_span); - Ok(( + ( StatusCode::OK, TypedHeader(ContentType::from(mimes::APPLICATION_XML.clone())), Body::from_stream(stream), - )) + ) +} + +pub(crate) async fn sitemap_handler( + Path(letter): Path, + conn: DbConnection, +) -> AxumResult { + if letter.len() != 1 { + return Err(AxumNope::ResourceNotFound); + } else if let Some(ch) = letter.chars().next() + && !(ch.is_ascii_lowercase()) + { + return Err(AxumNope::ResourceNotFound); + } + + let letter_pattern = format!("{letter}%"); + Ok(stream_sitemap(conn, move |conn| { + Box::pin( + sqlx::query_as!( + SitemapItem, + r#"SELECT crates.name, + releases.target_name, + release_build_status.last_build_time + FROM crates + INNER JOIN releases ON crates.latest_version_id = releases.id + INNER JOIN release_build_status ON release_build_status.rid = releases.id + WHERE + rustdoc_status = true AND + crates.name ILIKE $1 + "#, + letter_pattern, + ) + .fetch(&mut **conn), + ) + })) +} + +pub(crate) async fn recent_sitemap_handler( + Path(date): Path, + conn: DbConnection, +) -> AxumResult { + let next_day = date + .checked_add_days(Days::new(1)) + .ok_or(AxumNope::ResourceNotFound)?; + + let day_start = Utc.from_utc_datetime( + &date + .and_hms_opt(0, 0, 0) + .expect("00:00:00 is always a valid time"), + ); + let day_end = Utc.from_utc_datetime( + &next_day + .and_hms_opt(0, 0, 0) + .expect("00:00:00 is always a valid time"), + ); + + Ok(stream_sitemap(conn, move |conn| { + Box::pin( + sqlx::query_as!( + SitemapItem, + r#"SELECT crates.name, + releases.target_name, + release_build_status.last_build_time + FROM crates + INNER JOIN releases ON crates.latest_version_id = releases.id + INNER JOIN release_build_status ON release_build_status.rid = releases.id + WHERE + releases.rustdoc_status = true AND + release_build_status.last_build_time >= $1 AND + release_build_status.last_build_time < $2 + ORDER BY release_build_status.last_build_time DESC + "#, + day_start, + day_end, + ) + .fetch(&mut **conn), + ) + })) } #[cfg(test)] mod tests { use crate::testing::{ - AxumResponseTestExt, AxumRouterTestExt, TestEnvironmentExt as _, async_wrapper, + AxumResponseTestExt, AxumRouterTestExt, TestEnvironment, TestEnvironmentExt as _, }; + use anyhow::Result; use axum::http::StatusCode; - - #[test] - fn sitemap_index() { - async_wrapper(|env| async move { - let app = env.web_app().await; - app.assert_success("/sitemap.xml").await?; - Ok(()) - }) + use chrono::{TimeZone as _, Utc}; + use test_case::test_case; + + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_index() -> Result<()> { + let env = TestEnvironment::new().await?; + let app = env.web_app().await; + let response = app.get("/sitemap.xml").await?; + assert!(response.status().is_success()); + + let content = response.text().await?; + let today = Utc::now().date_naive(); + let expected_recent = format!("https://docs.rs/-/sitemap/recent/{today}/sitemap.xml",); + assert!(content.contains(&expected_recent)); + Ok(()) } - #[test] - fn sitemap_invalid_letters() { - async_wrapper(|env| async move { - let web = env.web_app().await; - - // everything not length=1 and ascii-lowercase should fail - for invalid_letter in &["1", "aa", "A", ""] { - println!("trying to fail letter {invalid_letter}"); - assert_eq!( - web.get(&format!("/-/sitemap/{invalid_letter}/sitemap.xml")) - .await? - .status(), - StatusCode::NOT_FOUND - ); - } - Ok(()) - }) + #[test_case("1")] + #[test_case("aa")] + #[test_case("A")] + #[test_case("")] + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_invalid_letters(invalid_letter: &str) -> Result<()> { + // everything not length=1 and ascii-lowercase should fail + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + + assert_eq!( + web.get(&format!("/-/sitemap/{invalid_letter}/sitemap.xml")) + .await? + .status(), + StatusCode::NOT_FOUND + ); + + Ok(()) } - #[test] - fn sitemap_letter() { - async_wrapper(|env| async move { - let web = env.web_app().await; + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_letter() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; - // letter-sitemaps always work, even without crates & releases - for letter in 'a'..='z' { - web.assert_success(&format!("/-/sitemap/{letter}/sitemap.xml")) - .await?; - } - - env.fake_release() - .await - .name("some_random_crate") - .create() - .await?; - env.fake_release() - .await - .name("some_random_crate_that_failed") - .build_result_failed() - .create() + // letter-sitemaps always work, even without crates & releases + for letter in 'a'..='z' { + web.assert_success(&format!("/-/sitemap/{letter}/sitemap.xml")) .await?; + } + + env.fake_release() + .await + .name("some_random_crate") + .create() + .await?; + env.fake_release() + .await + .name("some_random_crate_that_failed") + .build_result_failed() + .create() + .await?; + + // these fake crates appear only in the `s` sitemap + let response = web.get("/-/sitemap/s/sitemap.xml").await?; + assert!(response.status().is_success()); + + let content = response.text().await?; + assert!(content.contains("some_random_crate")); + assert!(!(content.contains("some_random_crate_that_failed"))); + + // and not in the others + for letter in ('a'..='z').filter(|&c| c != 's') { + let response = web.get(&format!("/-/sitemap/{letter}/sitemap.xml")).await?; - // these fake crates appear only in the `s` sitemap - let response = web.get("/-/sitemap/s/sitemap.xml").await?; assert!(response.status().is_success()); + assert!(!(response.text().await?.contains("some_random_crate"))); + } - let content = response.text().await?; - assert!(content.contains("some_random_crate")); - assert!(!(content.contains("some_random_crate_that_failed"))); + Ok(()) + } - // and not in the others - for letter in ('a'..='z').filter(|&c| c != 's') { - let response = web.get(&format!("/-/sitemap/{letter}/sitemap.xml")).await?; + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_max_age() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + let db = env.pool()?; + + env.fake_release() + .await + .name("some_random_crate") + .release_time(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + .create() + .await?; + + sqlx::query!( + r#"UPDATE release_build_status + SET last_build_time = $1 + FROM releases + INNER JOIN crates ON crates.id = releases.crate_id + WHERE release_build_status.rid = releases.id + AND crates.name = $2"#, + Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(), + "some_random_crate", + ) + .execute(&mut *db.get_async().await?) + .await?; - assert!(response.status().is_success()); - assert!(!(response.text().await?.contains("some_random_crate"))); - } + let response = web.get("/-/sitemap/s/sitemap.xml").await?; + assert!(response.status().is_success()); - Ok(()) - }) + let content = response.text().await?; + assert!(content.contains("2022-08-28T00:00:00+00:00")); + Ok(()) } - #[test] - fn sitemap_max_age() { - async_wrapper(|env| async move { - let web = env.web_app().await; - - use chrono::{TimeZone, Utc}; - env.fake_release() - .await - .name("some_random_crate") - .release_time(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) - .create() + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_recent_dates() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + + let now = Utc::now(); + let today = now.date_naive().to_string(); + + env.fake_release() + .await + .name("recent_sitemap_crate") + .create() + .await?; + env.fake_release() + .await + .name("recent_sitemap_crate_failed") + .build_result_failed() + .create() + .await?; + + { + let response = web + .assert_success(&format!("/-/sitemap/recent/{today}/sitemap.xml")) .await?; - let response = web.get("/-/sitemap/s/sitemap.xml").await?; - assert!(response.status().is_success()); + let content = response.text().await?; + assert!(content.contains("recent_sitemap_crate")); + assert!(!content.contains("recent_sitemap_crate_failed")); + } + + { + let response = web + .assert_success("/-/sitemap/recent/1970-01-01/sitemap.xml") + .await?; let content = response.text().await?; - assert!(content.contains("2022-08-28T00:00:00+00:00")); - Ok(()) - }) + assert!(!content.contains("recent_sitemap_crate")); + assert!(!content.contains("recent_sitemap_crate_failed")); + } + + Ok(()) + } + + #[test_case("invalid-date")] + #[test_case("2024-13-40")] + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_recent_invalid_dates(invalid_date: &str) -> Result<()> { + let env = TestEnvironment::new().await?; + + let web = env.web_app().await; + + assert_eq!( + web.get(&format!("/-/sitemap/recent/{invalid_date}/sitemap.xml")) + .await? + .status(), + StatusCode::BAD_REQUEST + ); + + Ok(()) } } diff --git a/crates/bin/docs_rs_web/src/routes.rs b/crates/bin/docs_rs_web/src/routes.rs index 8a01ec5c9..8fef55986 100644 --- a/crates/bin/docs_rs_web/src/routes.rs +++ b/crates/bin/docs_rs_web/src/routes.rs @@ -135,6 +135,10 @@ pub(crate) fn build_axum_routes() -> Result { get_static(|| async { cached_permanent_redirect("/-/static/opensearch.xml") }), ) .route_with_tsr("/sitemap.xml", get_internal(sitemap::sitemapindex_handler)) + .route_with_tsr( + "/-/sitemap/recent/{date}/sitemap.xml", + get_internal(sitemap::recent_sitemap_handler), + ) .route_with_tsr( "/-/sitemap/{letter}/sitemap.xml", get_internal(sitemap::sitemap_handler), diff --git a/crates/bin/docs_rs_web/templates/core/sitemap/index.xml b/crates/bin/docs_rs_web/templates/core/sitemap/index.xml index 6c5c88184..656b1a225 100644 --- a/crates/bin/docs_rs_web/templates/core/sitemap/index.xml +++ b/crates/bin/docs_rs_web/templates/core/sitemap/index.xml @@ -5,4 +5,9 @@ https://docs.rs/-/sitemap/{{ which }}/sitemap.xml {%- endfor %} + {% for which in recent_sitemaps -%} + + https://docs.rs/-/sitemap/recent/{{ which }}/sitemap.xml + + {%- endfor %} From 35853f6efec61da7e29517539ed0b3517e24d791 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 20 Mar 2026 17:57:18 +0100 Subject: [PATCH 2/4] remove unnecessary parentheses --- crates/bin/docs_rs_web/src/handlers/sitemap.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_web/src/handlers/sitemap.rs b/crates/bin/docs_rs_web/src/handlers/sitemap.rs index 9de627642..855ab1526 100644 --- a/crates/bin/docs_rs_web/src/handlers/sitemap.rs +++ b/crates/bin/docs_rs_web/src/handlers/sitemap.rs @@ -160,7 +160,7 @@ pub(crate) async fn sitemap_handler( if letter.len() != 1 { return Err(AxumNope::ResourceNotFound); } else if let Some(ch) = letter.chars().next() - && !(ch.is_ascii_lowercase()) + && !ch.is_ascii_lowercase() { return Err(AxumNope::ResourceNotFound); } From 3079eb382068540a7c90e2125098dcb55e022158 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 24 Mar 2026 12:08:47 +0100 Subject: [PATCH 3/4] sitemap: simplify structs --- ...28515b4d1205877515d5552908e2b8153c6a1.json | 35 ++++++++ ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 -------- ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 -------- ...df78e7fe89c92494924cac3164f084d9573d4.json | 34 ++++++++ ...28515b4d1205877515d5552908e2b8153c6a1.json | 35 ++++++++ ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 -------- ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 -------- ...df78e7fe89c92494924cac3164f084d9573d4.json | 34 ++++++++ ...28515b4d1205877515d5552908e2b8153c6a1.json | 35 ++++++++ ...69cad6514ef1f56bb633deb7a027484c7d535.json | 34 -------- ...77e895614ab77d023a8bc2eb333af7f71ab2f.json | 35 -------- ...df78e7fe89c92494924cac3164f084d9573d4.json | 34 ++++++++ .../bin/docs_rs_web/src/handlers/sitemap.rs | 80 ++++++++----------- .../templates/core/sitemap/_item.xml | 1 + 14 files changed, 242 insertions(+), 253 deletions(-) create mode 100644 .sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json delete mode 100644 .sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json delete mode 100644 .sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json create mode 100644 .sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json delete mode 100644 crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json delete mode 100644 crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json create mode 100644 crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json delete mode 100644 crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json delete mode 100644 crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json create mode 100644 crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json diff --git a/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json deleted file mode 100644 index cc59734f7..000000000 --- a/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" -} diff --git a/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json deleted file mode 100644 index 6c012fc91..000000000 --- a/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" -} diff --git a/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json deleted file mode 100644 index cc59734f7..000000000 --- a/crates/bin/cratesfyi/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" -} diff --git a/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json deleted file mode 100644 index 6c012fc91..000000000 --- a/crates/bin/cratesfyi/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" -} diff --git a/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json b/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json deleted file mode 100644 index cc59734f7..000000000 --- a/crates/bin/docs_rs_web/.sqlx/query-33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "33c4c717bad9ab974f95819d84b69cad6514ef1f56bb633deb7a027484c7d535" -} diff --git a/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json b/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json deleted file mode 100644 index 6c012fc91..000000000 --- a/crates/bin/docs_rs_web/.sqlx/query-671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n release_build_status.last_build_time\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "last_build_time", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz" - ] - }, - "nullable": [ - false, - true, - true - ] - }, - "hash": "671688ccc10465f1c109c08152777e895614ab77d023a8bc2eb333af7f71ab2f" -} diff --git a/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/crates/bin/docs_rs_web/src/handlers/sitemap.rs b/crates/bin/docs_rs_web/src/handlers/sitemap.rs index 855ab1526..6aa3c0ba8 100644 --- a/crates/bin/docs_rs_web/src/handlers/sitemap.rs +++ b/crates/bin/docs_rs_web/src/handlers/sitemap.rs @@ -24,13 +24,13 @@ const RECENT_SITEMAP_DAYS: u64 = 7; #[derive(Template)] #[template(path = "core/sitemap/index.xml")] #[derive(Debug, Clone, PartialEq, Eq)] -struct SitemapIndexXml { +struct SitemapIndex { sitemaps: Vec, recent_sitemaps: Vec, } impl_axum_webpage! { - SitemapIndexXml, + SitemapIndex, content_type = "application/xml", } @@ -47,7 +47,7 @@ pub(crate) async fn sitemapindex_handler() -> impl IntoResponse { }) .collect(); - SitemapIndexXml { + SitemapIndex { sitemaps, recent_sitemaps, } @@ -56,45 +56,27 @@ pub(crate) async fn sitemapindex_handler() -> impl IntoResponse { #[derive(Template)] #[template(path = "core/sitemap/_item.xml")] #[derive(Debug, Clone, PartialEq, Eq)] -struct SitemapItemXml { +struct SitemapItem { crate_name: String, - last_modified: String, target_name: String, + last_build_time: chrono::DateTime, } -const SITEMAP_HEADER: &[u8] = br#" -\n"#; - -const SITEMAP_FOOTER: &[u8] = b"\n"; - -fn render_sitemap_item(item: SitemapItem) -> AxumResult { - let target_name = item - .target_name - .expect("when we have rustdoc_status=true, this field is filled"); - - let item = (SitemapItemXml { - crate_name: item.name, - target_name, - last_modified: item - .last_build_time - .expect("when we have rustdoc_status=true, this field is filled") +impl SitemapItem { + fn last_modified(&self) -> String { + self.last_build_time // On Aug 27 2022 we added `` to all pages, // so they should all get recrawled if they haven't been since then. .max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap()) .format("%+") - .to_string(), - }) - .render() - .map_err(|err| AxumNope::InternalError(err.into()))?; - - Ok(Bytes::from(item)) + .to_string() + } } -struct SitemapItem { - name: String, - target_name: Option, - last_build_time: Option>, -} +const SITEMAP_HEADER: &[u8] = br#" +\n"#; + +const SITEMAP_FOOTER: &[u8] = b"\n"; type SitemapQueryStream<'a> = BoxStream<'a, Result>; @@ -111,9 +93,9 @@ where let result = query(&mut conn); pin_mut!(result); - while let Some(row) = result.next().await { - let row = match row { - Ok(row) => row, + while let Some(item) = result.next().await { + let item = match item { + Ok(item) => item, Err(err) => { error!(?err, "error fetching row from database"); yield Err(AxumNope::InternalError(err.into())); @@ -121,15 +103,17 @@ where } }; - match render_sitemap_item(row) { - Ok(bytes) => { + let mut buf = Vec::with_capacity(400); + + match item.write_into(&mut buf) { + Ok(_) => { items += 1; - streamed_bytes += bytes.len(); - yield Ok(bytes); + streamed_bytes += buf.len(); + yield Ok(Bytes::from(buf)); } Err(err) => { error!(?err, "error when rendering sitemap item xml"); - yield Err(err); + yield Err(AxumNope::InternalError(err.into())); break; } } @@ -170,9 +154,11 @@ pub(crate) async fn sitemap_handler( Box::pin( sqlx::query_as!( SitemapItem, - r#"SELECT crates.name, - releases.target_name, - release_build_status.last_build_time + r#"SELECT crates.name as "crate_name", + -- when we have rustdoc_status=true, both these fields are always filled, + -- so forcing them as non-option is ok. + releases.target_name as "target_name!", + release_build_status.last_build_time as "last_build_time!" FROM crates INNER JOIN releases ON crates.latest_version_id = releases.id INNER JOIN release_build_status ON release_build_status.rid = releases.id @@ -210,9 +196,11 @@ pub(crate) async fn recent_sitemap_handler( Box::pin( sqlx::query_as!( SitemapItem, - r#"SELECT crates.name, - releases.target_name, - release_build_status.last_build_time + r#"SELECT crates.name as "crate_name", + -- when we have rustdoc_status=true, both these fields are always filled, + -- so forcing them as non-option is ok. + releases.target_name as "target_name!", + release_build_status.last_build_time as "last_build_time!" FROM crates INNER JOIN releases ON crates.latest_version_id = releases.id INNER JOIN release_build_status ON release_build_status.rid = releases.id diff --git a/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml b/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml index d3e56e9a3..f96fa9ecc 100644 --- a/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml +++ b/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml @@ -1,3 +1,4 @@ + {% let last_modified = last_modified() -%} https://docs.rs/{{ crate_name }}/latest/{{ target_name }}/ {{ last_modified|escape_xml }} From a0747e9a9454e5bf847f63bd45c009e950db597b Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 24 Mar 2026 12:17:34 +0100 Subject: [PATCH 4/4] sitemap: don't use checked_ variant when not necessary --- crates/bin/docs_rs_web/src/handlers/sitemap.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/bin/docs_rs_web/src/handlers/sitemap.rs b/crates/bin/docs_rs_web/src/handlers/sitemap.rs index 6aa3c0ba8..567259550 100644 --- a/crates/bin/docs_rs_web/src/handlers/sitemap.rs +++ b/crates/bin/docs_rs_web/src/handlers/sitemap.rs @@ -38,13 +38,7 @@ pub(crate) async fn sitemapindex_handler() -> impl IntoResponse { let sitemaps: Vec = ('a'..='z').collect(); let today = Utc::now().date_naive(); let recent_sitemaps = (0..RECENT_SITEMAP_DAYS) - .map(|days| { - today - .checked_sub_days(Days::new(days)) - .expect("underflow when building recent sitemap dates") - .format("%F") - .to_string() - }) + .map(|days| (today + Days::new(days)).format("%F").to_string()) .collect(); SitemapIndex {