diff --git a/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json b/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json new file mode 100644 index 000000000..960489039 --- /dev/null +++ b/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE release_build_status\n SET last_build_time = $1\n FROM releases\n INNER JOIN crates ON crates.id = releases.crate_id\n WHERE release_build_status.rid = releases.id\n AND crates.name = $2", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz", + "Text" + ] + }, + "nullable": [] + }, + "hash": "4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8" +} diff --git a/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/crates/bin/cratesfyi/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json b/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json new file mode 100644 index 000000000..b6ff9d999 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n releases.rustdoc_status = true AND\n release_build_status.last_build_time >= $1 AND\n release_build_status.last_build_time < $2\n ORDER BY release_build_status.last_build_time DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Timestamptz", + "Timestamptz" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "1f073d9d6bd0aff5c885217ca5b28515b4d1205877515d5552908e2b8153c6a1" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json b/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json new file mode 100644 index 000000000..960489039 --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE release_build_status\n SET last_build_time = $1\n FROM releases\n INNER JOIN crates ON crates.id = releases.crate_id\n WHERE release_build_status.rid = releases.id\n AND crates.name = $2", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz", + "Text" + ] + }, + "nullable": [] + }, + "hash": "4a7812c6d849bbe2df34107d81b6f2a16cc17c471eb199d557ab6b32656ccef8" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json b/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json new file mode 100644 index 000000000..cac81bc0a --- /dev/null +++ b/crates/bin/docs_rs_web/.sqlx/query-985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT crates.name as \"crate_name\",\n -- when we have rustdoc_status=true, both these fields are always filled,\n -- so forcing them as non-option is ok.\n releases.target_name as \"target_name!\",\n release_build_status.last_build_time as \"last_build_time!\"\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON release_build_status.rid = releases.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "crate_name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "target_name!", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "last_build_time!", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + true, + true + ] + }, + "hash": "985245a6f5c2b4fd17b5462122cdf78e7fe89c92494924cac3164f084d9573d4" +} diff --git a/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json b/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json deleted file mode 100644 index ce7e21cd4..000000000 --- a/crates/bin/docs_rs_web/.sqlx/query-df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT crates.name,\n releases.target_name,\n MAX(releases.release_time) as \"release_time!\"\n FROM crates\n INNER JOIN releases ON releases.crate_id = crates.id\n WHERE\n rustdoc_status = true AND\n crates.name ILIKE $1\n GROUP BY crates.name, releases.target_name\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "release_time!", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - true, - null - ] - }, - "hash": "df1c002b7c4f5e2567eeefff56ee51003d90122f83429313966add5b224f5f6c" -} diff --git a/crates/bin/docs_rs_web/src/handlers/sitemap.rs b/crates/bin/docs_rs_web/src/handlers/sitemap.rs index b77fa3725..567259550 100644 --- a/crates/bin/docs_rs_web/src/handlers/sitemap.rs +++ b/crates/bin/docs_rs_web/src/handlers/sitemap.rs @@ -12,38 +12,59 @@ use axum::{ response::IntoResponse, }; use axum_extra::{TypedHeader, headers::ContentType}; -use chrono::{TimeZone, Utc}; +use chrono::{Days, NaiveDate, TimeZone, Utc}; use docs_rs_mimes as mimes; -use futures_util::{StreamExt as _, pin_mut}; +use futures_util::{StreamExt as _, pin_mut, stream::BoxStream}; use tracing::{Span, error}; use tracing_futures::Instrument as _; +const RECENT_SITEMAP_DAYS: u64 = 7; + /// sitemap index #[derive(Template)] #[template(path = "core/sitemap/index.xml")] #[derive(Debug, Clone, PartialEq, Eq)] -struct SitemapIndexXml { +struct SitemapIndex { sitemaps: Vec, + recent_sitemaps: Vec, } impl_axum_webpage! { - SitemapIndexXml, + SitemapIndex, content_type = "application/xml", } pub(crate) async fn sitemapindex_handler() -> impl IntoResponse { let sitemaps: Vec = ('a'..='z').collect(); - - SitemapIndexXml { sitemaps } + let today = Utc::now().date_naive(); + let recent_sitemaps = (0..RECENT_SITEMAP_DAYS) + .map(|days| (today + Days::new(days)).format("%F").to_string()) + .collect(); + + SitemapIndex { + sitemaps, + recent_sitemaps, + } } #[derive(Template)] #[template(path = "core/sitemap/_item.xml")] #[derive(Debug, Clone, PartialEq, Eq)] -struct SitemapItemXml { +struct SitemapItem { crate_name: String, - last_modified: String, target_name: String, + last_build_time: chrono::DateTime, +} + +impl SitemapItem { + fn last_modified(&self) -> String { + self.last_build_time + // On Aug 27 2022 we added `` to all pages, + // so they should all get recrawled if they haven't been since then. + .max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap()) + .format("%+") + .to_string() + } } const SITEMAP_HEADER: &[u8] = br#" @@ -51,45 +72,24 @@ const SITEMAP_HEADER: &[u8] = br#" const SITEMAP_FOOTER: &[u8] = b"\n"; -pub(crate) async fn sitemap_handler( - Path(letter): Path, - mut conn: DbConnection, -) -> AxumResult { - if letter.len() != 1 { - return Err(AxumNope::ResourceNotFound); - } else if let Some(ch) = letter.chars().next() - && !(ch.is_ascii_lowercase()) - { - return Err(AxumNope::ResourceNotFound); - } +type SitemapQueryStream<'a> = BoxStream<'a, Result>; +fn stream_sitemap(mut conn: DbConnection, query: Query) -> impl IntoResponse +where + Query: for<'a> FnOnce(&'a mut DbConnection) -> SitemapQueryStream<'a> + Send + 'static, +{ let stream_span = Span::current(); - let stream = stream!({ let mut items: usize = 0; let mut streamed_bytes: usize = SITEMAP_HEADER.len(); yield Ok(Bytes::from_static(SITEMAP_HEADER)); - let result = sqlx::query!( - r#"SELECT crates.name, - releases.target_name, - MAX(releases.release_time) as "release_time!" - FROM crates - INNER JOIN releases ON releases.crate_id = crates.id - WHERE - rustdoc_status = true AND - crates.name ILIKE $1 - GROUP BY crates.name, releases.target_name - "#, - format!("{letter}%"), - ) - .fetch(&mut *conn); - + let result = query(&mut conn); pin_mut!(result); - while let Some(row) = result.next().await { - let row = match row { - Ok(row) => row, + while let Some(item) = result.next().await { + let item = match item { + Ok(item) => item, Err(err) => { error!(?err, "error fetching row from database"); yield Err(AxumNope::InternalError(err.into())); @@ -97,33 +97,20 @@ pub(crate) async fn sitemap_handler( } }; - match (SitemapItemXml { - crate_name: row.name, - target_name: row - .target_name - .expect("when we have rustdoc_status=true, this field is filled"), - last_modified: row - .release_time - // On Aug 27 2022 we added `` to all pages, - // so they should all get recrawled if they haven't been since then. - .max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap()) - .format("%+") - .to_string(), - }) - .render() - { - Ok(item) => { - let bytes = Bytes::from(item); + let mut buf = Vec::with_capacity(400); + + match item.write_into(&mut buf) { + Ok(_) => { items += 1; - streamed_bytes += bytes.len(); - yield Ok(bytes); + streamed_bytes += buf.len(); + yield Ok(Bytes::from(buf)); } Err(err) => { error!(?err, "error when rendering sitemap item xml"); yield Err(AxumNope::InternalError(err.into())); break; } - }; + } } streamed_bytes += SITEMAP_FOOTER.len(); @@ -132,115 +119,277 @@ pub(crate) async fn sitemap_handler( if items > 50_000 || streamed_bytes > 50 * 1024 * 1024 { // alert when sitemap limits are reached // https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap#general-guidelines - error!(items, streamed_bytes, letter, "sitemap limits exceeded") + error!(items, streamed_bytes, "sitemap limits exceeded"); } }) .instrument(stream_span); - Ok(( + ( StatusCode::OK, TypedHeader(ContentType::from(mimes::APPLICATION_XML.clone())), Body::from_stream(stream), - )) + ) +} + +pub(crate) async fn sitemap_handler( + Path(letter): Path, + conn: DbConnection, +) -> AxumResult { + if letter.len() != 1 { + return Err(AxumNope::ResourceNotFound); + } else if let Some(ch) = letter.chars().next() + && !ch.is_ascii_lowercase() + { + return Err(AxumNope::ResourceNotFound); + } + + let letter_pattern = format!("{letter}%"); + Ok(stream_sitemap(conn, move |conn| { + Box::pin( + sqlx::query_as!( + SitemapItem, + r#"SELECT crates.name as "crate_name", + -- when we have rustdoc_status=true, both these fields are always filled, + -- so forcing them as non-option is ok. + releases.target_name as "target_name!", + release_build_status.last_build_time as "last_build_time!" + FROM crates + INNER JOIN releases ON crates.latest_version_id = releases.id + INNER JOIN release_build_status ON release_build_status.rid = releases.id + WHERE + rustdoc_status = true AND + crates.name ILIKE $1 + "#, + letter_pattern, + ) + .fetch(&mut **conn), + ) + })) +} + +pub(crate) async fn recent_sitemap_handler( + Path(date): Path, + conn: DbConnection, +) -> AxumResult { + let next_day = date + .checked_add_days(Days::new(1)) + .ok_or(AxumNope::ResourceNotFound)?; + + let day_start = Utc.from_utc_datetime( + &date + .and_hms_opt(0, 0, 0) + .expect("00:00:00 is always a valid time"), + ); + let day_end = Utc.from_utc_datetime( + &next_day + .and_hms_opt(0, 0, 0) + .expect("00:00:00 is always a valid time"), + ); + + Ok(stream_sitemap(conn, move |conn| { + Box::pin( + sqlx::query_as!( + SitemapItem, + r#"SELECT crates.name as "crate_name", + -- when we have rustdoc_status=true, both these fields are always filled, + -- so forcing them as non-option is ok. + releases.target_name as "target_name!", + release_build_status.last_build_time as "last_build_time!" + FROM crates + INNER JOIN releases ON crates.latest_version_id = releases.id + INNER JOIN release_build_status ON release_build_status.rid = releases.id + WHERE + releases.rustdoc_status = true AND + release_build_status.last_build_time >= $1 AND + release_build_status.last_build_time < $2 + ORDER BY release_build_status.last_build_time DESC + "#, + day_start, + day_end, + ) + .fetch(&mut **conn), + ) + })) } #[cfg(test)] mod tests { use crate::testing::{ - AxumResponseTestExt, AxumRouterTestExt, TestEnvironmentExt as _, async_wrapper, + AxumResponseTestExt, AxumRouterTestExt, TestEnvironment, TestEnvironmentExt as _, }; + use anyhow::Result; use axum::http::StatusCode; - - #[test] - fn sitemap_index() { - async_wrapper(|env| async move { - let app = env.web_app().await; - app.assert_success("/sitemap.xml").await?; - Ok(()) - }) + use chrono::{TimeZone as _, Utc}; + use test_case::test_case; + + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_index() -> Result<()> { + let env = TestEnvironment::new().await?; + let app = env.web_app().await; + let response = app.get("/sitemap.xml").await?; + assert!(response.status().is_success()); + + let content = response.text().await?; + let today = Utc::now().date_naive(); + let expected_recent = format!("https://docs.rs/-/sitemap/recent/{today}/sitemap.xml",); + assert!(content.contains(&expected_recent)); + Ok(()) } - #[test] - fn sitemap_invalid_letters() { - async_wrapper(|env| async move { - let web = env.web_app().await; - - // everything not length=1 and ascii-lowercase should fail - for invalid_letter in &["1", "aa", "A", ""] { - println!("trying to fail letter {invalid_letter}"); - assert_eq!( - web.get(&format!("/-/sitemap/{invalid_letter}/sitemap.xml")) - .await? - .status(), - StatusCode::NOT_FOUND - ); - } - Ok(()) - }) + #[test_case("1")] + #[test_case("aa")] + #[test_case("A")] + #[test_case("")] + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_invalid_letters(invalid_letter: &str) -> Result<()> { + // everything not length=1 and ascii-lowercase should fail + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + + assert_eq!( + web.get(&format!("/-/sitemap/{invalid_letter}/sitemap.xml")) + .await? + .status(), + StatusCode::NOT_FOUND + ); + + Ok(()) } - #[test] - fn sitemap_letter() { - async_wrapper(|env| async move { - let web = env.web_app().await; - - // letter-sitemaps always work, even without crates & releases - for letter in 'a'..='z' { - web.assert_success(&format!("/-/sitemap/{letter}/sitemap.xml")) - .await?; - } + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_letter() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; - env.fake_release() - .await - .name("some_random_crate") - .create() - .await?; - env.fake_release() - .await - .name("some_random_crate_that_failed") - .build_result_failed() - .create() + // letter-sitemaps always work, even without crates & releases + for letter in 'a'..='z' { + web.assert_success(&format!("/-/sitemap/{letter}/sitemap.xml")) .await?; + } + + env.fake_release() + .await + .name("some_random_crate") + .create() + .await?; + env.fake_release() + .await + .name("some_random_crate_that_failed") + .build_result_failed() + .create() + .await?; + + // these fake crates appear only in the `s` sitemap + let response = web.get("/-/sitemap/s/sitemap.xml").await?; + assert!(response.status().is_success()); + + let content = response.text().await?; + assert!(content.contains("some_random_crate")); + assert!(!(content.contains("some_random_crate_that_failed"))); + + // and not in the others + for letter in ('a'..='z').filter(|&c| c != 's') { + let response = web.get(&format!("/-/sitemap/{letter}/sitemap.xml")).await?; - // these fake crates appear only in the `s` sitemap - let response = web.get("/-/sitemap/s/sitemap.xml").await?; assert!(response.status().is_success()); + assert!(!(response.text().await?.contains("some_random_crate"))); + } - let content = response.text().await?; - assert!(content.contains("some_random_crate")); - assert!(!(content.contains("some_random_crate_that_failed"))); + Ok(()) + } - // and not in the others - for letter in ('a'..='z').filter(|&c| c != 's') { - let response = web.get(&format!("/-/sitemap/{letter}/sitemap.xml")).await?; + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_max_age() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + let db = env.pool()?; + + env.fake_release() + .await + .name("some_random_crate") + .release_time(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + .create() + .await?; + + sqlx::query!( + r#"UPDATE release_build_status + SET last_build_time = $1 + FROM releases + INNER JOIN crates ON crates.id = releases.crate_id + WHERE release_build_status.rid = releases.id + AND crates.name = $2"#, + Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(), + "some_random_crate", + ) + .execute(&mut *db.get_async().await?) + .await?; - assert!(response.status().is_success()); - assert!(!(response.text().await?.contains("some_random_crate"))); - } + let response = web.get("/-/sitemap/s/sitemap.xml").await?; + assert!(response.status().is_success()); - Ok(()) - }) + let content = response.text().await?; + assert!(content.contains("2022-08-28T00:00:00+00:00")); + Ok(()) } - #[test] - fn sitemap_max_age() { - async_wrapper(|env| async move { - let web = env.web_app().await; - - use chrono::{TimeZone, Utc}; - env.fake_release() - .await - .name("some_random_crate") - .release_time(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) - .create() + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_recent_dates() -> Result<()> { + let env = TestEnvironment::new().await?; + let web = env.web_app().await; + + let now = Utc::now(); + let today = now.date_naive().to_string(); + + env.fake_release() + .await + .name("recent_sitemap_crate") + .create() + .await?; + env.fake_release() + .await + .name("recent_sitemap_crate_failed") + .build_result_failed() + .create() + .await?; + + { + let response = web + .assert_success(&format!("/-/sitemap/recent/{today}/sitemap.xml")) .await?; - let response = web.get("/-/sitemap/s/sitemap.xml").await?; - assert!(response.status().is_success()); + let content = response.text().await?; + assert!(content.contains("recent_sitemap_crate")); + assert!(!content.contains("recent_sitemap_crate_failed")); + } + + { + let response = web + .assert_success("/-/sitemap/recent/1970-01-01/sitemap.xml") + .await?; let content = response.text().await?; - assert!(content.contains("2022-08-28T00:00:00+00:00")); - Ok(()) - }) + assert!(!content.contains("recent_sitemap_crate")); + assert!(!content.contains("recent_sitemap_crate_failed")); + } + + Ok(()) + } + + #[test_case("invalid-date")] + #[test_case("2024-13-40")] + #[tokio::test(flavor = "multi_thread")] + async fn sitemap_recent_invalid_dates(invalid_date: &str) -> Result<()> { + let env = TestEnvironment::new().await?; + + let web = env.web_app().await; + + assert_eq!( + web.get(&format!("/-/sitemap/recent/{invalid_date}/sitemap.xml")) + .await? + .status(), + StatusCode::BAD_REQUEST + ); + + Ok(()) } } diff --git a/crates/bin/docs_rs_web/src/routes.rs b/crates/bin/docs_rs_web/src/routes.rs index 8a01ec5c9..8fef55986 100644 --- a/crates/bin/docs_rs_web/src/routes.rs +++ b/crates/bin/docs_rs_web/src/routes.rs @@ -135,6 +135,10 @@ pub(crate) fn build_axum_routes() -> Result { get_static(|| async { cached_permanent_redirect("/-/static/opensearch.xml") }), ) .route_with_tsr("/sitemap.xml", get_internal(sitemap::sitemapindex_handler)) + .route_with_tsr( + "/-/sitemap/recent/{date}/sitemap.xml", + get_internal(sitemap::recent_sitemap_handler), + ) .route_with_tsr( "/-/sitemap/{letter}/sitemap.xml", get_internal(sitemap::sitemap_handler), diff --git a/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml b/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml index d3e56e9a3..f96fa9ecc 100644 --- a/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml +++ b/crates/bin/docs_rs_web/templates/core/sitemap/_item.xml @@ -1,3 +1,4 @@ + {% let last_modified = last_modified() -%} https://docs.rs/{{ crate_name }}/latest/{{ target_name }}/ {{ last_modified|escape_xml }} diff --git a/crates/bin/docs_rs_web/templates/core/sitemap/index.xml b/crates/bin/docs_rs_web/templates/core/sitemap/index.xml index 6c5c88184..656b1a225 100644 --- a/crates/bin/docs_rs_web/templates/core/sitemap/index.xml +++ b/crates/bin/docs_rs_web/templates/core/sitemap/index.xml @@ -5,4 +5,9 @@ https://docs.rs/-/sitemap/{{ which }}/sitemap.xml {%- endfor %} + {% for which in recent_sitemaps -%} + + https://docs.rs/-/sitemap/recent/{{ which }}/sitemap.xml + + {%- endfor %}