From b0c1f9aa80ff3bbbb38d48c163f61a81f6117fd2 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 24 Sep 2022 10:24:11 +0200 Subject: [PATCH 01/12] new cache-policy & cache middleware structure to support full page caching --- src/config.rs | 2 - src/test/mod.rs | 93 ++++++++++++- src/web/builds.rs | 37 ++--- src/web/cache.rs | 170 +++++++++++++++++++++++ src/web/crate_details.rs | 55 ++++++-- src/web/file.rs | 21 ++- src/web/mod.rs | 7 + src/web/page/web_page.rs | 23 ++-- src/web/routes.rs | 11 +- src/web/rustdoc.rs | 289 ++++++++++++++++++++++++++------------- src/web/statics.rs | 100 ++++---------- 11 files changed, 564 insertions(+), 244 deletions(-) create mode 100644 src/web/cache.rs diff --git a/src/config.rs b/src/config.rs index 5e9d5a540..9a751b982 100644 --- a/src/config.rs +++ b/src/config.rs @@ -61,7 +61,6 @@ pub struct Config { // If both are absent, don't generate the header. If only one is present, // generate just that directive. Values are in seconds. pub(crate) cache_control_stale_while_revalidate: Option, - pub(crate) cache_control_max_age: Option, pub(crate) cdn_backend: CdnKind, @@ -145,7 +144,6 @@ impl Config { cache_control_stale_while_revalidate: maybe_env( "CACHE_CONTROL_STALE_WHILE_REVALIDATE", )?, - cache_control_max_age: maybe_env("CACHE_CONTROL_MAX_AGE")?, cdn_backend: env("DOCSRS_CDN_BACKEND", CdnKind::Dummy)?, diff --git a/src/test/mod.rs b/src/test/mod.rs index 3cfb2c995..0c76ca079 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -6,15 +6,16 @@ use crate::db::{Pool, PoolClient}; use crate::error::Result; use crate::repositories::RepositoryStatsUpdater; use crate::storage::{Storage, StorageKind}; -use crate::web::Server; +use crate::web::{cache, Server}; use crate::{BuildQueue, Config, Context, Index, Metrics}; use anyhow::Context as _; use fn_error_context::context; +use iron::headers::CacheControl; use log::error; use once_cell::unsync::OnceCell; use postgres::Client as Connection; use reqwest::{ - blocking::{Client, ClientBuilder, RequestBuilder}, + blocking::{Client, ClientBuilder, RequestBuilder, Response}, Method, }; use std::{fs, net::SocketAddr, panic, sync::Arc, time::Duration}; @@ -42,6 +43,31 @@ pub(crate) fn wrapper(f: impl FnOnce(&TestEnvironment) -> Result<()>) { } } +/// check a request if the cache control header matches NoCache +pub(crate) fn assert_no_cache(res: &Response) { + assert_eq!( + res.headers() + .get("Cache-Control") + .expect("missing cache-control header"), + cache::NO_CACHE, + ); +} + +/// check a request if the cache control header matches the given cache config. +pub(crate) fn assert_cache_control( + res: &Response, + cache_policy: cache::CachePolicy, + config: &Config, +) { + assert!(config.cache_control_stale_while_revalidate.is_some()); + assert_eq!( + res.headers() + .get("Cache-Control") + .expect("missing cache-control header"), + &CacheControl(cache_policy.render(config)).to_string() + ); +} + /// Make sure that a URL returns a status code between 200-299 pub(crate) fn assert_success(path: &str, web: &TestFrontend) -> Result<()> { let status = web.get(path).send()?.status(); @@ -49,14 +75,42 @@ pub(crate) fn assert_success(path: &str, web: &TestFrontend) -> Result<()> { Ok(()) } +/// Make sure that a URL returns a status code between 200-299, +/// also check the cache-control headers. +pub(crate) fn assert_success_cached( + path: &str, + web: &TestFrontend, + cache_policy: cache::CachePolicy, + config: &Config, +) -> Result<()> { + let response = web.get(path).send()?; + assert_cache_control(&response, cache_policy, config); + let status = response.status(); + assert!(status.is_success(), "failed to GET {}: {}", path, status); + Ok(()) +} + /// Make sure that a URL returns a 404 pub(crate) fn assert_not_found(path: &str, web: &TestFrontend) -> Result<()> { - let status = web.get(path).send()?.status(); - assert_eq!(status, 404, "GET {} should have been a 404", path); + let response = web.get(path).send()?; + + // for now, 404s should always have `no-cache` + assert_no_cache(&response); + + assert_eq!( + response.status(), + 404, + "GET {} should have been a 404", + path + ); Ok(()) } -fn assert_redirect_common(path: &str, expected_target: &str, web: &TestFrontend) -> Result<()> { +fn assert_redirect_common( + path: &str, + expected_target: &str, + web: &TestFrontend, +) -> Result { let response = web.get_no_redirect(path).send()?; let status = response.status(); if !status.is_redirection() { @@ -83,7 +137,7 @@ fn assert_redirect_common(path: &str, expected_target: &str, web: &TestFrontend) anyhow::bail!("got redirect to {redirect_target}"); } - Ok(()) + Ok(response) } /// Makes sure that a URL redirects to a specific page, but doesn't check that the target exists @@ -93,7 +147,7 @@ pub(crate) fn assert_redirect_unchecked( expected_target: &str, web: &TestFrontend, ) -> Result<()> { - assert_redirect_common(path, expected_target, web) + assert_redirect_common(path, expected_target, web).map(|_| ()) } /// Make sure that a URL redirects to a specific page, and that the target exists and is not another redirect @@ -110,6 +164,27 @@ pub(crate) fn assert_redirect(path: &str, expected_target: &str, web: &TestFront Ok(()) } +/// Make sure that a URL redirects to a specific page, and that the target exists and is not another redirect +#[context("expected redirect from {path} to {expected_target}")] +pub(crate) fn assert_redirect_cached( + path: &str, + expected_target: &str, + cache_policy: cache::CachePolicy, + web: &TestFrontend, + config: &Config, +) -> Result<()> { + let redirect_response = assert_redirect_common(path, expected_target, web)?; + assert_cache_control(&redirect_response, cache_policy, config); + + let response = web.get_no_redirect(expected_target).send()?; + let status = response.status(); + if !status.is_success() { + anyhow::bail!("failed to GET {expected_target}: {status}"); + } + + Ok(()) +} + pub(crate) struct TestEnvironment { build_queue: OnceCell>, config: OnceCell>, @@ -187,6 +262,10 @@ impl TestEnvironment { config.local_archive_cache_path = std::env::temp_dir().join(format!("docsrs-test-index-{}", rand::random::())); + // set stale content serving so Cache::ForeverOnlyInCdn and Cache::ForeverInCdnAndStaleInBrowser + // are actually different. + config.cache_control_stale_while_revalidate = Some(86400); + config } diff --git a/src/web/builds.rs b/src/web/builds.rs index d7f7e74cb..67fdb6710 100644 --- a/src/web/builds.rs +++ b/src/web/builds.rs @@ -1,4 +1,4 @@ -use super::{match_version, redirect_base, MatchSemver}; +use super::{cache::CachePolicy, match_version, redirect_base, MatchSemver}; use crate::{ db::Pool, docbuilder::Limits, @@ -7,9 +7,7 @@ use crate::{ }; use chrono::{DateTime, Utc}; use iron::{ - headers::{ - AccessControlAllowOrigin, CacheControl, CacheDirective, ContentType, Expires, HttpDate, - }, + headers::{AccessControlAllowOrigin, ContentType}, status, IronResult, Request, Response, Url, }; use router::Router; @@ -107,12 +105,8 @@ pub fn build_list_handler(req: &mut Request) -> IronResult { if is_json { let mut resp = Response::with((status::Ok, serde_json::to_string(&builds).unwrap())); resp.headers.set(ContentType::json()); - resp.headers.set(Expires(HttpDate(time::now()))); - resp.headers.set(CacheControl(vec![ - CacheDirective::NoCache, - CacheDirective::NoStore, - CacheDirective::MustRevalidate, - ])); + resp.extensions + .insert::(CachePolicy::NoStoreMustRevalidate); resp.headers.set(AccessControlAllowOrigin::Any); Ok(resp) @@ -131,7 +125,10 @@ pub fn build_list_handler(req: &mut Request) -> IronResult { #[cfg(test)] mod tests { - use crate::test::{wrapper, FakeBuild}; + use crate::{ + test::{assert_cache_control, wrapper, FakeBuild}, + web::cache::CachePolicy, + }; use chrono::{DateTime, Duration, Utc}; use kuchiki::traits::TendrilSink; use reqwest::StatusCode; @@ -156,12 +153,9 @@ mod tests { ]) .create()?; - let page = kuchiki::parse_html().one( - env.frontend() - .get("/crate/foo/0.1.0/builds") - .send()? - .text()?, - ); + let response = env.frontend().get("/crate/foo/0.1.0/builds").send()?; + assert_cache_control(&response, CachePolicy::NoCaching, &env.config()); + let page = kuchiki::parse_html().one(response.text()?); let rows: Vec<_> = page .select("ul > li a.release") @@ -200,12 +194,9 @@ mod tests { ]) .create()?; - let value: serde_json::Value = serde_json::from_str( - &env.frontend() - .get("/crate/foo/0.1.0/builds.json") - .send()? - .text()?, - )?; + let response = env.frontend().get("/crate/foo/0.1.0/builds.json").send()?; + assert_cache_control(&response, CachePolicy::NoStoreMustRevalidate, &env.config()); + let value: serde_json::Value = serde_json::from_str(&response.text()?)?; assert_eq!(value.pointer("/0/build_status"), Some(&true.into())); assert_eq!( diff --git a/src/web/cache.rs b/src/web/cache.rs new file mode 100644 index 000000000..33963fccd --- /dev/null +++ b/src/web/cache.rs @@ -0,0 +1,170 @@ +use super::STATIC_FILE_CACHE_DURATION; +use crate::config::Config; +use iron::{ + headers::{CacheControl, CacheDirective}, + AfterMiddleware, IronResult, Request, Response, +}; + +#[cfg(test)] +pub const NO_CACHE: &str = "no-cache, max-age=0"; + +/// defines the wanted caching behaviour for a web response. +pub enum CachePolicy { + /// no browser or CDN caching. + /// In some cases the browser might still use cached content, + /// for example when using the "back" button or when it can't + /// connect to the server. + NoCaching, + /// don't cache, plus + /// * enforce revalidation + /// * never store + NoStoreMustRevalidate, + /// cache forever in browser & CDN. + /// Valid when you have hashed / versioned filenames and every rebuild would + /// change the filename. + ForeverInCdnAndBrowser, + /// cache forever in CDN, but not in the browser. + /// Since we control the CDN we can actively purge content that is cached like + /// this, for example after building a crate. + /// Example usage: `/latest/` rustdoc pages and their redirects. + ForeverOnlyInCdn, + /// cache forver in the CDN, but allow stale content in the browser. + /// Example: rustdoc pages with the version in their URL. + /// A browser will show the stale content while getting the up-to-date + /// version from the origin server in the background. + /// This helps building a PWA. + ForeverInCdnAndStaleInBrowser, +} + +impl CachePolicy { + pub fn render(&self, config: &Config) -> Vec { + match *self { + CachePolicy::NoCaching => { + vec![CacheDirective::NoCache, CacheDirective::MaxAge(0)] + } + CachePolicy::NoStoreMustRevalidate => { + vec![ + CacheDirective::NoCache, + CacheDirective::NoStore, + CacheDirective::MustRevalidate, + CacheDirective::MaxAge(0), + ] + } + CachePolicy::ForeverInCdnAndBrowser => { + vec![ + CacheDirective::Public, + CacheDirective::MaxAge(STATIC_FILE_CACHE_DURATION as u32), + ] + } + CachePolicy::ForeverOnlyInCdn => { + // A missing `max-age` or `s-maxage` in the Cache-Control header will lead to + // CloudFront using the default TTL, while the browser not seeing any caching header. + // This means we can have the CDN caching the documentation while just + // issuing a purge after a build. + // https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Expiration.html#ExpirationDownloadDist + vec![CacheDirective::Public] + } + CachePolicy::ForeverInCdnAndStaleInBrowser => { + let mut directives = CachePolicy::ForeverOnlyInCdn.render(config); + if let Some(seconds) = config.cache_control_stale_while_revalidate { + directives.push(CacheDirective::Extension( + "stale-while-revalidate".to_string(), + Some(seconds.to_string()), + )); + } + directives + } + } + } +} + +impl iron::typemap::Key for CachePolicy { + type Value = CachePolicy; +} + +/// Middleware to ensure a correct cache-control header. +/// The default is an explicit "never cache" header, which +/// can be adapted via: +/// ```ignore +/// resp.extensions.insert::(CachePolicy::ForeverOnlyInCdn); +/// # change Cache::ForeverOnlyInCdn into the cache polity you want to have +/// ``` +/// in a handler function. +pub(super) struct CacheMiddleware; + +impl AfterMiddleware for CacheMiddleware { + fn after(&self, req: &mut Request, mut res: Response) -> IronResult { + let config = req.extensions.get::().expect("missing config"); + let cache = res + .extensions + .get::() + .unwrap_or(&CachePolicy::NoCaching); + + if cfg!(test) { + // handlers should never set their own caching headers and + // only use the caching header templates above. + assert!(!res.headers.has::()); + } + + res.headers.set(CacheControl(cache.render(config))); + Ok(res) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::wrapper; + use iron::headers::CacheControl; + use test_case::test_case; + + #[test_case(CachePolicy::NoCaching, "no-cache, max-age=0")] + #[test_case( + CachePolicy::NoStoreMustRevalidate, + "no-cache, no-store, must-revalidate, max-age=0" + )] + #[test_case(CachePolicy::ForeverInCdnAndBrowser, "public, max-age=31104000")] + #[test_case(CachePolicy::ForeverOnlyInCdn, "public")] + #[test_case( + CachePolicy::ForeverInCdnAndStaleInBrowser, + "public, stale-while-revalidate=86400" + )] + fn render(cache: CachePolicy, expected: &str) { + wrapper(|env| { + assert_eq!( + CacheControl(cache.render(&env.config())).to_string(), + expected + ); + Ok(()) + }); + } + + #[test] + fn render_stale_without_config() { + wrapper(|env| { + env.override_config(|config| config.cache_control_stale_while_revalidate = None); + + assert_eq!( + CacheControl(CachePolicy::ForeverInCdnAndStaleInBrowser.render(&env.config())) + .to_string(), + "public" + ); + Ok(()) + }); + } + #[test] + fn render_stale_with_config() { + wrapper(|env| { + env.override_config(|config| { + config.cache_control_stale_while_revalidate = Some(666); + }); + + assert_eq!( + CacheControl(CachePolicy::ForeverInCdnAndStaleInBrowser.render(&env.config())) + .to_string(), + "public, stale-while-revalidate=666" + ); + Ok(()) + }); + } +} diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 2e8040fb8..4f2ed891c 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -1,6 +1,11 @@ use super::{match_version, redirect_base, render_markdown, MatchSemver, MetaData}; use crate::utils::{get_correct_docsrs_style_file, report_error}; -use crate::{db::Pool, impl_webpage, repositories::RepositoryStatsUpdater, web::page::WebPage}; +use crate::{ + db::Pool, + impl_webpage, + repositories::RepositoryStatsUpdater, + web::{cache::CachePolicy, page::WebPage}, +}; use anyhow::anyhow; use chrono::{DateTime, Utc}; use iron::prelude::*; @@ -303,16 +308,16 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { req, Url::parse(&format!("{}/crate/{}/latest", redirect_base(req), name,)), ); - return Ok(super::redirect(url)); + return Ok(super::cached_redirect(url, CachePolicy::ForeverOnlyInCdn)); } let mut conn = extension!(req, Pool).get()?; let found_version = match_version(&mut conn, name, req_version).and_then(|m| m.assume_exact())?; - let (version, version_or_latest) = match found_version { - MatchSemver::Exact((version, _)) => (version.clone(), version), - MatchSemver::Latest((version, _)) => (version, "latest".to_string()), + let (version, version_or_latest, is_latest_url) = match found_version { + MatchSemver::Exact((version, _)) => (version.clone(), version, false), + MatchSemver::Latest((version, _)) => (version, "latest".to_string(), true), MatchSemver::Semver((version, _)) => { let url = ctry!( req, @@ -324,7 +329,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { )), ); - return Ok(super::redirect(url)); + return Ok(super::cached_redirect(url, CachePolicy::ForeverOnlyInCdn)); } }; @@ -343,14 +348,20 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { ) ); - CrateDetailsPage { details }.into_response(req) + let mut res = CrateDetailsPage { details }.into_response(req)?; + res.extensions.insert::(if is_latest_url { + CachePolicy::ForeverOnlyInCdn + } else { + CachePolicy::ForeverInCdnAndStaleInBrowser + }); + Ok(res) } #[cfg(test)] mod tests { use super::*; use crate::index::api::CrateOwner; - use crate::test::{assert_redirect, wrapper, TestDatabase}; + use crate::test::{assert_cache_control, assert_redirect_cached, wrapper, TestDatabase}; use anyhow::{Context, Error}; use kuchiki::traits::TendrilSink; use std::collections::HashMap; @@ -581,11 +592,14 @@ mod tests { env.fake_release().name("foo").version("0.0.1").create()?; env.fake_release().name("foo").version("0.0.2").create()?; - let web = env.frontend(); + let response = env.frontend().get("/crate/foo/0.0.1").send()?; + assert_cache_control( + &response, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + ); - assert!(web - .get("/crate/foo/0.0.1") - .send()? + assert!(response .text()? .contains("rel=\"canonical\" href=\"https://docs.rs/crate/foo/latest")); @@ -1013,6 +1027,7 @@ mod tests { let resp = env.frontend().get("/crate/dummy/latest").send()?; assert!(resp.status().is_success()); + assert_cache_control(&resp, CachePolicy::ForeverOnlyInCdn, &env.config()); assert!(resp.url().as_str().ends_with("/crate/dummy/latest")); let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); assert!(body.contains(" Response { - use iron::headers::{CacheControl, CacheDirective, ContentType, HttpDate, LastModified}; + use iron::headers::{ContentType, HttpDate, LastModified}; let mut response = Response::with((status::Ok, self.0.content)); - let cache = vec![ - CacheDirective::Public, - CacheDirective::MaxAge(super::STATIC_FILE_CACHE_DURATION as u32), - ]; response .headers .set(ContentType(self.0.mime.parse().unwrap())); - response.headers.set(CacheControl(cache)); + // FIXME: This is so horrible response.headers.set(LastModified(HttpDate( time::strptime( @@ -41,14 +38,18 @@ impl File { .unwrap(), ))); response + .extensions + .insert::(CachePolicy::ForeverInCdnAndBrowser); + response } } #[cfg(test)] mod tests { use super::*; - use crate::test::wrapper; + use crate::{test::wrapper, web::cache::CachePolicy}; use chrono::Utc; + use iron::headers::CacheControl; #[test] fn file_roundtrip() { @@ -66,6 +67,12 @@ mod tests { file.0.date_updated = now; let resp = file.serve(); + assert!(resp.headers.get::().is_none()); + let cache = resp + .extensions + .get::() + .expect("missing cache response extension"); + assert!(matches!(cache, CachePolicy::ForeverInCdnAndBrowser)); assert_eq!( resp.headers.get_raw("Last-Modified").unwrap(), [now.format("%a, %d %b %Y %T GMT").to_string().into_bytes()].as_ref(), diff --git a/src/web/mod.rs b/src/web/mod.rs index 5d709c472..15261b80f 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -74,6 +74,7 @@ macro_rules! extension { mod build_details; mod builds; +pub(crate) mod cache; pub(crate) mod crate_details; mod csp; mod error; @@ -128,6 +129,7 @@ impl MainHandler { chain.link_before(CspMiddleware); chain.link_after(CspMiddleware); + chain.link_after(cache::CacheMiddleware); chain } @@ -485,7 +487,12 @@ fn duration_to_str(init: DateTime) -> String { fn redirect(url: Url) -> Response { let mut resp = Response::with((status::Found, Redirect(url))); resp.headers.set(Expires(HttpDate(time::now()))); + resp +} +fn cached_redirect(url: Url, cache_policy: cache::CachePolicy) -> Response { + let mut resp = Response::with((status::Found, Redirect(url))); + resp.extensions.insert::(cache_policy); resp } diff --git a/src/web/page/web_page.rs b/src/web/page/web_page.rs index b34ba4f18..1a7f0e098 100644 --- a/src/web/page/web_page.rs +++ b/src/web/page/web_page.rs @@ -1,12 +1,9 @@ use super::TemplateData; -use crate::ctry; -use crate::web::csp::Csp; -use iron::{ - headers::{CacheControl, ContentType}, - response::Response, - status::Status, - IronResult, Request, +use crate::{ + ctry, + web::{cache::CachePolicy, csp::Csp}, }; +use iron::{headers::ContentType, response::Response, status::Status, IronResult, Request}; use serde::Serialize; use std::borrow::Cow; use tera::Context; @@ -81,7 +78,9 @@ pub trait WebPage: Serialize + Sized { let mut response = Response::with((status, rendered)); response.headers.set(Self::content_type()); - response.headers.set(Self::cache_control()); + if let Some(cache) = Self::cache_policy() { + response.extensions.insert::(cache); + } Ok(response) } @@ -99,8 +98,10 @@ pub trait WebPage: Serialize + Sized { ContentType::html() } - /// The contents of the Cache-Control header. Defaults to no caching. - fn cache_control() -> CacheControl { - CacheControl(vec![]) + /// caching for this page. + /// `None` leads to the default from the `CacheMiddleware` + /// being used. + fn cache_policy() -> Option { + None } } diff --git a/src/web/routes.rs b/src/web/routes.rs index 841377b1d..a2f0c33db 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -1,11 +1,8 @@ use crate::web::page::WebPage; -use super::metrics::RequestRecorder; +use super::{cache::CachePolicy, metrics::RequestRecorder}; use ::std::borrow::Cow; -use iron::{ - headers::{CacheControl, CacheDirective}, - middleware::Handler, -}; +use iron::middleware::Handler; use router::Router; use std::collections::HashSet; @@ -42,8 +39,8 @@ pub(super) fn build_routes() -> Routes { fn template(&self) -> Cow<'static, str> { "storage-change-detection.html".into() } - fn cache_control() -> CacheControl { - CacheControl(vec![CacheDirective::MaxAge(604800)]) + fn cache_policy() -> Option { + Some(CachePolicy::ForeverInCdnAndBrowser) } } fn storage_change_detection(req: &mut iron::Request) -> iron::IronResult { diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 7da82ae5e..940e55657 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -5,19 +5,16 @@ use crate::{ repositories::RepositoryStatsUpdater, utils, web::{ - crate_details::CrateDetails, csp::Csp, error::Nope, file::File, match_version, - metrics::RenderingTimesRecorder, parse_url_with_params, redirect_base, MatchSemver, - MetaData, + cache::CachePolicy, crate_details::CrateDetails, csp::Csp, error::Nope, file::File, + match_version, metrics::RenderingTimesRecorder, parse_url_with_params, redirect_base, + MatchSemver, MetaData, }, Config, Metrics, Storage, }; use anyhow::{anyhow, Context}; use iron::{ - headers::{CacheControl, CacheDirective, Expires, HttpDate}, - modifiers::Redirect, - status, - url::percent_encoding::percent_decode, - Handler, IronResult, Request, Response, Url, + modifiers::Redirect, status, url::percent_encoding::percent_decode, Handler, IronResult, + Request, Response, Url, }; use lol_html::errors::RewritingError; use once_cell::sync::Lazy; @@ -48,7 +45,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { fn redirect_to_doc( req: &Request, url_str: String, - permanent: bool, + cache_policy: CachePolicy, path_in_crate: Option<&str>, ) -> IronResult { let mut queries = BTreeMap::new(); @@ -57,14 +54,8 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { } queries.extend(req.url.as_ref().query_pairs()); let url = ctry!(req, parse_url_with_params(&url_str, queries)); - let (status_code, max_age) = if permanent { - (status::MovedPermanently, 86400) - } else { - (status::Found, 0) - }; - let mut resp = Response::with((status_code, Redirect(url))); - resp.headers - .set(CacheControl(vec![CacheDirective::MaxAge(max_age)])); + let mut resp = Response::with((status::Found, Redirect(url))); + resp.extensions.insert::(cache_policy); Ok(resp) } @@ -75,8 +66,8 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { ); let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); - + resp.extensions + .insert::(CachePolicy::ForeverOnlyInCdn); Ok(resp) } @@ -141,7 +132,12 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { if let Some(inner_path) = DOC_RUST_LANG_ORG_REDIRECTS.get(crate_name.as_str()) { let url = format!("https://doc.rust-lang.org/{inner_path}/"); - return redirect_to_doc(req, url, false, path_in_crate.as_deref()); + return redirect_to_doc( + req, + url, + CachePolicy::ForeverInCdnAndStaleInBrowser, + path_in_crate.as_deref(), + ); } let req_version = router.find("version"); @@ -193,7 +189,13 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { format!("{base}/{crate_name}/{version}/{target_name}/") }; - redirect_to_doc(req, url_str, version == "latest", path_in_crate.as_deref()) + let cache = if version == "latest" { + CachePolicy::ForeverOnlyInCdn + } else { + CachePolicy::ForeverInCdnAndStaleInBrowser + }; + + redirect_to_doc(req, url_str, cache, path_in_crate.as_deref()) } else { rendering_time.step("redirect to crate"); redirect_to_crate(req, &crate_name, &version) @@ -260,28 +262,11 @@ impl RustdocPage { let mut response = Response::with((Status::Ok, html)); response.headers.set(ContentType::html()); - - if is_latest_url { - response - .headers - .set(CacheControl(vec![CacheDirective::MaxAge(0)])); + response.extensions.insert::(if is_latest_url { + CachePolicy::ForeverOnlyInCdn } else { - let mut directives = vec![]; - if let Some(seconds) = config.cache_control_stale_while_revalidate { - directives.push(CacheDirective::Extension( - "stale-while-revalidate".to_string(), - Some(format!("{}", seconds)), - )); - } - - if let Some(seconds) = config.cache_control_max_age { - directives.push(CacheDirective::MaxAge(seconds)); - } - - if !directives.is_empty() { - response.headers.set(CacheControl(directives)); - } - } + CachePolicy::ForeverInCdnAndStaleInBrowser + }); Ok(response) } } @@ -319,7 +304,11 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { req_path.drain(..2).for_each(drop); // Convenience closure to allow for easy redirection - let redirect = |name: &str, vers: &str, path: &[&str]| -> IronResult { + let redirect = |name: &str, + vers: &str, + path: &[&str], + cache_policy: CachePolicy| + -> IronResult { // Format and parse the redirect url let redirect_path = format!( "{}/{}/{}/{}", @@ -330,7 +319,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { ); let url = ctry!(req, Url::parse(&redirect_path)); - Ok(super::redirect(url)) + Ok(super::cached_redirect(url, cache_policy)) }; rendering_time.step("match version"); @@ -343,23 +332,23 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { // * If there is a semver (but not exact) match, redirect to the exact version. let release_found = match_version(&mut conn, &name, url_version)?; - let (version, version_or_latest) = match release_found.version { + let (version, version_or_latest, is_latest_url) = match release_found.version { MatchSemver::Exact((version, _)) => { // Redirect when the requested crate name isn't correct if let Some(name) = release_found.corrected_name { - return redirect(&name, &version, &req_path); + return redirect(&name, &version, &req_path, CachePolicy::NoCaching); } - (version.clone(), version) + (version.clone(), version, false) } MatchSemver::Latest((version, _)) => { // Redirect when the requested crate name isn't correct if let Some(name) = release_found.corrected_name { - return redirect(&name, "latest", &req_path); + return redirect(&name, "latest", &req_path, CachePolicy::NoCaching); } - (version, "latest".to_string()) + (version, "latest".to_string(), true) } // Redirect when the requested version isn't correct @@ -367,7 +356,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { // to prevent cloudfront caching the wrong artifacts on URLs with loose semver // versions, redirect the browser to the returned version instead of loading it // immediately - return redirect(&name, &v, &req_path); + return redirect(&name, &v, &req_path, CachePolicy::ForeverOnlyInCdn); } }; @@ -394,7 +383,12 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { // if visiting the full path to the default target, remove the target from the path // expects a req_path that looks like `[/:target]/.*` if req_path.first().copied() == Some(&krate.metadata.default_target) { - return redirect(&name, &version_or_latest, &req_path[1..]); + return redirect( + &name, + &version_or_latest, + &req_path[1..], + CachePolicy::ForeverOnlyInCdn, + ); } // Create the path to access the file from @@ -429,7 +423,12 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { req, storage.rustdoc_file_exists(&name, &version, &path, krate.archive_storage) ) { - redirect(&name, &version_or_latest, &req_path) + redirect( + &name, + &version_or_latest, + &req_path, + CachePolicy::ForeverOnlyInCdn, + ) } else if req_path.first().map_or(false, |p| p.contains('-')) { // This is a target, not a module; it may not have been built. // Redirect to the default target and show a search page instead of a hard 404. @@ -437,6 +436,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { &format!("/crate/{}", name), &format!("{}/target-redirect", version), &req_path, + CachePolicy::ForeverOnlyInCdn, ) } else { Err(Nope::ResourceNotFound.into()) @@ -448,7 +448,19 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { if !path.ends_with(".html") { rendering_time.step("serve asset"); - return Ok(File(blob).serve()); + let mut response = File(blob).serve(); + // default asset caching behaviour is `Cache::ForeverInCdnAndBrowser`. + // We don't want to cache invocation specific assets in the browser + // but only in the CDN so we can invalidate these after a build. + // For CDN caching the same rules should apply as for HTML files, + // which means we cache slightly different for `/latest/` and + // URLs with versions. + response.extensions.insert::(if is_latest_url { + CachePolicy::ForeverOnlyInCdn + } else { + CachePolicy::ForeverInCdnAndStaleInBrowser + }); + return Ok(response); } rendering_time.step("find latest path"); @@ -549,7 +561,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { target, inner_path, is_latest_version, - is_latest_url: version_or_latest == "latest", + is_latest_url, is_prerelease, metadata: krate.metadata.clone(), krate, @@ -630,9 +642,9 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let release_found = match_version(&mut conn, name, Some(version))?; - let (version, version_or_latest) = match release_found.version { - MatchSemver::Exact((version, _)) => (version.clone(), version), - MatchSemver::Latest((version, _)) => (version, "latest".to_string()), + let (version, version_or_latest, is_latest_url) = match release_found.version { + MatchSemver::Exact((version, _)) => (version.clone(), version, false), + MatchSemver::Latest((version, _)) => (version, "latest".to_string(), true), // semver matching not supported here MatchSemver::Semver(_) => return Err(Nope::VersionNotFound.into()), }; @@ -688,8 +700,11 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let url = ctry!(req, Url::parse(&url)); let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); - + resp.extensions.insert::(if is_latest_url { + CachePolicy::ForeverOnlyInCdn + } else { + CachePolicy::ForeverInCdnAndStaleInBrowser + }); Ok(resp) } @@ -706,7 +721,10 @@ pub fn badge_handler(req: &mut Request) -> IronResult { let name = cexpect!(req, extension!(req, Router).find("crate")); let url = format!("https://img.shields.io/docsrs/{}/{}", name, version); let url = ctry!(req, Url::parse(&url)); - Ok(Response::with((status::MovedPermanently, Redirect(url)))) + let mut res = Response::with((status::MovedPermanently, Redirect(url))); + res.extensions + .insert::(CachePolicy::ForeverInCdnAndBrowser); + Ok(res) } /// Serves shared web resources used by rustdoc-generated documentation. @@ -741,7 +759,7 @@ impl Handler for SharedResourceHandler { #[cfg(test)] mod test { - use crate::test::*; + use crate::{test::*, web::cache::CachePolicy, Config}; use anyhow::Context; use kuchiki::traits::TendrilSink; use reqwest::{blocking::ClientBuilder, redirect, StatusCode}; @@ -751,9 +769,16 @@ mod test { fn try_latest_version_redirect( path: &str, web: &TestFrontend, + config: &Config, ) -> Result, anyhow::Error> { assert_success(path, web)?; - let data = web.get(path).send()?.text()?; + let response = web.get(path).send()?; + assert_cache_control( + &response, + CachePolicy::ForeverInCdnAndStaleInBrowser, + config, + ); + let data = response.text()?; log::info!("fetched path {} and got content {}\nhelp: if this is missing the header, remember to add ", path, data); let dom = kuchiki::parse_html().one(data); @@ -763,15 +788,19 @@ mod test { .next() { let link = elem.attributes.borrow().get("href").unwrap().to_string(); - assert_success(&link, web)?; + assert_success_cached(&link, web, CachePolicy::ForeverOnlyInCdn, config)?; Ok(Some(link)) } else { Ok(None) } } - fn latest_version_redirect(path: &str, web: &TestFrontend) -> Result { - try_latest_version_redirect(path, web)? + fn latest_version_redirect( + path: &str, + web: &TestFrontend, + config: &Config, + ) -> Result { + try_latest_version_redirect(path, web, config)? .with_context(|| anyhow::anyhow!("no redirect found for {}", path)) } @@ -799,14 +828,49 @@ mod test { .build_result_failed() .create()?; let web = env.frontend(); - assert_success("/", web)?; - assert_success("/crate/buggy/0.1.0/", web)?; - assert_success("/buggy/0.1.0/directory_1/index.html", web)?; - assert_success("/buggy/0.1.0/directory_2.html/index.html", web)?; - assert_success("/buggy/0.1.0/directory_3/.gitignore", web)?; - assert_success("/buggy/0.1.0/settings.html", web)?; - assert_success("/buggy/0.1.0/all.html", web)?; - assert_success("/buggy/0.1.0/directory_4/empty_file_no_ext", web)?; + assert_success_cached("/", web, CachePolicy::NoCaching, &env.config())?; + assert_success_cached( + "/crate/buggy/0.1.0/", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/directory_1/index.html", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/directory_2.html/index.html", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/directory_3/.gitignore", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/settings.html", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/all.html", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_success_cached( + "/buggy/0.1.0/directory_4/empty_file_no_ext", + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; Ok(()) }); } @@ -825,8 +889,19 @@ mod test { let web = env.frontend(); // no explicit default-target let base = "/dummy/0.1.0/dummy/"; - assert_success(base, web)?; - assert_redirect("/dummy/0.1.0/x86_64-unknown-linux-gnu/dummy/", base, web)?; + assert_success_cached( + base, + web, + CachePolicy::ForeverInCdnAndStaleInBrowser, + &env.config(), + )?; + assert_redirect_cached( + "/dummy/0.1.0/x86_64-unknown-linux-gnu/dummy/", + base, + CachePolicy::ForeverOnlyInCdn, + web, + &env.config(), + )?; assert_success("/dummy/latest/dummy/", web)?; @@ -879,6 +954,7 @@ mod test { .create()?; let resp = env.frontend().get("/dummy/latest/dummy/").send()?; + assert_cache_control(&resp, CachePolicy::ForeverOnlyInCdn, &env.config()); assert!(resp.url().as_str().ends_with("/dummy/latest/dummy/")); let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); assert!(body.contains(" {})", last_url, current_url); assert_eq!(response.status(), StatusCode::MOVED_PERMANENTLY); + assert_cache_control( + &response, + CachePolicy::ForeverInCdnAndBrowser, + &env.config(), + ); assert_eq!( current_url.as_str(), "https://img.shields.io/docsrs/zstd/latest" @@ -1343,7 +1435,6 @@ mod test { }) .collect()) } - fn assert_platform_links( web: &TestFrontend, path: &str, @@ -1676,7 +1767,7 @@ mod test { } #[test] - fn test_redirect_to_latest_301() { + fn test_redirect_to_latest_302() { wrapper(|env| { env.fake_release().name("dummy").version("1.0.0").create()?; let web = env.frontend(); @@ -1686,10 +1777,10 @@ mod test { .unwrap(); let url = format!("http://{}/dummy", web.server_addr()); let resp = client.get(url).send()?; - assert_eq!(resp.status(), StatusCode::MOVED_PERMANENTLY); + assert_eq!(resp.status(), StatusCode::FOUND); assert_eq!( resp.headers().get("Cache-Control").unwrap(), - reqwest::header::HeaderValue::from_str("max-age=86400").unwrap() + reqwest::header::HeaderValue::from_str("public").unwrap() ); assert!(resp .headers() @@ -1929,7 +2020,8 @@ mod test { assert_eq!( latest_version_redirect( "/tungstenite/0.10.0/tungstenite/?search=String%20-%3E%20Message", - env.frontend() + env.frontend(), + &env.config() )?, "/crate/tungstenite/latest/target-redirect/x86_64-unknown-linux-gnu/tungstenite/index.html?search=String%20-%3E%20Message", ); @@ -1952,7 +2044,8 @@ mod test { assert_eq!( latest_version_redirect( "/pyo3/0.2.7/src/pyo3/objects/exc.rs.html", - env.frontend() + env.frontend(), + &env.config(), )?, target_redirect ); diff --git a/src/web/statics.rs b/src/web/statics.rs index 7f2522c51..fa61c5fe5 100644 --- a/src/web/statics.rs +++ b/src/web/statics.rs @@ -1,10 +1,9 @@ -use super::{error::Nope, redirect, redirect_base, STATIC_FILE_CACHE_DURATION}; +use super::{cache::CachePolicy, error::Nope, redirect, redirect_base}; use crate::utils::report_error; use anyhow::Context; use chrono::prelude::*; use iron::{ - headers::CacheDirective, - headers::{CacheControl, ContentLength, ContentType, LastModified}, + headers::{ContentLength, ContentType, LastModified}, status::Status, IronResult, Request, Response, Url, }; @@ -99,11 +98,9 @@ where { let mut response = Response::with((Status::Ok, resource.as_ref())); - let cache = vec![ - CacheDirective::Public, - CacheDirective::MaxAge(STATIC_FILE_CACHE_DURATION as u32), - ]; - response.headers.set(CacheControl(cache)); + response + .extensions + .insert::(CachePolicy::ForeverInCdnAndBrowser); response .headers @@ -145,8 +142,13 @@ mod tests { use iron::status::Status; use super::{serve_file, STATIC_SEARCH_PATHS, STYLE_CSS, VENDORED_CSS}; - use crate::test::wrapper; + use crate::{ + test::{assert_cache_control, wrapper}, + web::cache::CachePolicy, + }; + use reqwest::StatusCode; use std::fs; + use test_case::test_case; #[test] fn style_css() { @@ -155,6 +157,7 @@ mod tests { let resp = web.get("/-/static/style.css").send()?; assert!(resp.status().is_success()); + assert_cache_control(&resp, CachePolicy::ForeverInCdnAndBrowser, &env.config()); assert_eq!( resp.headers().get("Content-Type"), Some(&"text/css".parse().unwrap()), @@ -173,6 +176,7 @@ mod tests { let resp = web.get("/-/static/vendored.css").send()?; assert!(resp.status().is_success()); + assert_cache_control(&resp, CachePolicy::ForeverInCdnAndBrowser, &env.config()); assert_eq!( resp.headers().get("Content-Type"), Some(&"text/css".parse().unwrap()), @@ -184,73 +188,23 @@ mod tests { }); } - #[test] - fn index_js() { - wrapper(|env| { - let web = env.frontend(); - - let resp = web.get("/-/static/index.js").send()?; - assert!(resp.status().is_success()); - assert_eq!( - resp.headers().get("Content-Type"), - Some(&"application/javascript".parse().unwrap()), - ); - assert!(resp.content_length().unwrap() > 10); - assert!(resp.text()?.contains("copyTextHandler")); - - Ok(()) - }); - } - - #[test] - fn menu_js() { - wrapper(|env| { - let web = env.frontend(); - - let resp = web.get("/-/static/menu.js").send()?; - assert!(resp.status().is_success()); - assert_eq!( - resp.headers().get("Content-Type"), - Some(&"application/javascript".parse().unwrap()), - ); - assert!(resp.content_length().unwrap() > 10); - assert!(resp.text()?.contains("closeMenu")); - - Ok(()) - }); - } - - #[test] - fn keyboard_js() { + #[test_case("/-/static/index.js", "copyTextHandler")] + #[test_case("/-/static/menu.js", "closeMenu")] + #[test_case("/-/static/keyboard.js", "handleKey")] + #[test_case("/-/static/source.js", "toggleSource")] + fn js_content(path: &str, expected_content: &str) { wrapper(|env| { let web = env.frontend(); - let resp = web.get("/-/static/keyboard.js").send()?; + let resp = web.get(path).send()?; assert!(resp.status().is_success()); + assert_cache_control(&resp, CachePolicy::ForeverInCdnAndBrowser, &env.config()); assert_eq!( resp.headers().get("Content-Type"), Some(&"application/javascript".parse().unwrap()), ); assert!(resp.content_length().unwrap() > 10); - assert!(resp.text()?.contains("handleKey")); - - Ok(()) - }); - } - - #[test] - fn source_js() { - wrapper(|env| { - let web = env.frontend(); - - let resp = web.get("/-/static/source.js").send()?; - assert!(resp.status().is_success()); - assert_eq!( - resp.headers().get("Content-Type"), - Some(&"application/javascript".parse().unwrap()), - ); - assert!(resp.content_length().unwrap() > 10); - assert!(resp.text()?.contains("toggleSource")); + assert!(resp.text()?.contains(expected_content)); Ok(()) }); @@ -274,6 +228,7 @@ mod tests { let resp = web.get(&url).send()?; assert!(resp.status().is_success(), "failed to fetch {:?}", url); + assert_cache_control(&resp, CachePolicy::ForeverInCdnAndBrowser, &env.config()); assert_eq!( resp.bytes()?, fs::read(&path).unwrap(), @@ -290,14 +245,9 @@ mod tests { #[test] fn static_file_that_doesnt_exist() { wrapper(|env| { - let web = env.frontend(); - assert_eq!( - web.get("/-/static/whoop-de-do.png") - .send()? - .status() - .as_u16(), - 404, - ); + let response = env.frontend().get("/-/static/whoop-de-do.png").send()?; + assert_cache_control(&response, CachePolicy::NoCaching, &env.config()); + assert_eq!(response.status(), StatusCode::NOT_FOUND); Ok(()) }); From 2f39aff56b9f988bed7db42563bb161e81f787e0 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 19:34:38 +0200 Subject: [PATCH 02/12] fix doc comment for assert_redirect_cached --- src/test/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/mod.rs b/src/test/mod.rs index 0c76ca079..ff493d5c8 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -164,7 +164,8 @@ pub(crate) fn assert_redirect(path: &str, expected_target: &str, web: &TestFront Ok(()) } -/// Make sure that a URL redirects to a specific page, and that the target exists and is not another redirect +/// Make sure that a URL redirects to a specific page, and that the target exists and is not another redirect. +/// Also verifies that the redirect's cache-control header matches the provided cache policy. #[context("expected redirect from {path} to {expected_target}")] pub(crate) fn assert_redirect_cached( path: &str, From 92a393baf69d9f6ae46a150fbf9ac00539767001 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 19:42:31 +0200 Subject: [PATCH 03/12] change NoCache policy to just `max-age=0` to allow back-forward cache in the browser --- src/web/cache.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/web/cache.rs b/src/web/cache.rs index 33963fccd..52cb7f5c7 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -6,7 +6,7 @@ use iron::{ }; #[cfg(test)] -pub const NO_CACHE: &str = "no-cache, max-age=0"; +pub const NO_CACHE: &str = "max-age=0"; /// defines the wanted caching behaviour for a web response. pub enum CachePolicy { @@ -40,7 +40,7 @@ impl CachePolicy { pub fn render(&self, config: &Config) -> Vec { match *self { CachePolicy::NoCaching => { - vec![CacheDirective::NoCache, CacheDirective::MaxAge(0)] + vec![CacheDirective::MaxAge(0)] } CachePolicy::NoStoreMustRevalidate => { vec![ @@ -118,7 +118,7 @@ mod tests { use iron::headers::CacheControl; use test_case::test_case; - #[test_case(CachePolicy::NoCaching, "no-cache, max-age=0")] + #[test_case(CachePolicy::NoCaching, "max-age=0")] #[test_case( CachePolicy::NoStoreMustRevalidate, "no-cache, no-store, must-revalidate, max-age=0" From 570e74b0a9d5a869ab8adf0ffc08d710dfb08406 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 19:48:54 +0200 Subject: [PATCH 04/12] set assert! message when misusing cache control --- src/web/cache.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/web/cache.rs b/src/web/cache.rs index 52cb7f5c7..ec1f3c9e8 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -101,9 +101,10 @@ impl AfterMiddleware for CacheMiddleware { .unwrap_or(&CachePolicy::NoCaching); if cfg!(test) { - // handlers should never set their own caching headers and - // only use the caching header templates above. - assert!(!res.headers.has::()); + assert!( + !res.headers.has::(), + "handlers should never set their own caching headers and only use CachePolicy to control caching." + ); } res.headers.set(CacheControl(cache.render(config))); From af124e6423e49a028a19a9dc7e0dfb547f01f9d3 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 19:53:18 +0200 Subject: [PATCH 05/12] update comment wording --- src/web/rustdoc.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 940e55657..c164e3c03 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -450,8 +450,8 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { let mut response = File(blob).serve(); // default asset caching behaviour is `Cache::ForeverInCdnAndBrowser`. - // We don't want to cache invocation specific assets in the browser - // but only in the CDN so we can invalidate these after a build. + // We want invocation specific assets cached in the CDN but not in the browser. + // That way we can invalidate the CDN cache after a build. // For CDN caching the same rules should apply as for HTML files, // which means we cache slightly different for `/latest/` and // URLs with versions. From 5853ffab2fb4643dcb5becdae158c22bb62651ab Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 19:58:46 +0200 Subject: [PATCH 06/12] fix rustfmt error --- src/web/cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/web/cache.rs b/src/web/cache.rs index ec1f3c9e8..f469ad70c 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -102,7 +102,7 @@ impl AfterMiddleware for CacheMiddleware { if cfg!(test) { assert!( - !res.headers.has::(), + !res.headers.has::(), "handlers should never set their own caching headers and only use CachePolicy to control caching." ); } From 3e3c2577e5f236aba6a4c5f91c4d89dec87eef4f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 26 Sep 2022 21:20:50 +0200 Subject: [PATCH 07/12] rename CachePolicy::ForeverOnlyInCdn into ForeverInCdn --- src/test/mod.rs | 2 +- src/web/cache.rs | 12 ++++++------ src/web/crate_details.rs | 10 +++++----- src/web/rustdoc.rs | 26 +++++++++++++------------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/test/mod.rs b/src/test/mod.rs index ff493d5c8..3b1c8e638 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -263,7 +263,7 @@ impl TestEnvironment { config.local_archive_cache_path = std::env::temp_dir().join(format!("docsrs-test-index-{}", rand::random::())); - // set stale content serving so Cache::ForeverOnlyInCdn and Cache::ForeverInCdnAndStaleInBrowser + // set stale content serving so Cache::ForeverInCdn and Cache::ForeverInCdnAndStaleInBrowser // are actually different. config.cache_control_stale_while_revalidate = Some(86400); diff --git a/src/web/cache.rs b/src/web/cache.rs index f469ad70c..3631ce583 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -27,7 +27,7 @@ pub enum CachePolicy { /// Since we control the CDN we can actively purge content that is cached like /// this, for example after building a crate. /// Example usage: `/latest/` rustdoc pages and their redirects. - ForeverOnlyInCdn, + ForeverInCdn, /// cache forver in the CDN, but allow stale content in the browser. /// Example: rustdoc pages with the version in their URL. /// A browser will show the stale content while getting the up-to-date @@ -56,7 +56,7 @@ impl CachePolicy { CacheDirective::MaxAge(STATIC_FILE_CACHE_DURATION as u32), ] } - CachePolicy::ForeverOnlyInCdn => { + CachePolicy::ForeverInCdn => { // A missing `max-age` or `s-maxage` in the Cache-Control header will lead to // CloudFront using the default TTL, while the browser not seeing any caching header. // This means we can have the CDN caching the documentation while just @@ -65,7 +65,7 @@ impl CachePolicy { vec![CacheDirective::Public] } CachePolicy::ForeverInCdnAndStaleInBrowser => { - let mut directives = CachePolicy::ForeverOnlyInCdn.render(config); + let mut directives = CachePolicy::ForeverInCdn.render(config); if let Some(seconds) = config.cache_control_stale_while_revalidate { directives.push(CacheDirective::Extension( "stale-while-revalidate".to_string(), @@ -86,8 +86,8 @@ impl iron::typemap::Key for CachePolicy { /// The default is an explicit "never cache" header, which /// can be adapted via: /// ```ignore -/// resp.extensions.insert::(CachePolicy::ForeverOnlyInCdn); -/// # change Cache::ForeverOnlyInCdn into the cache polity you want to have +/// resp.extensions.insert::(CachePolicy::ForeverInCdn); +/// # change Cache::ForeverInCdn into the cache polity you want to have /// ``` /// in a handler function. pub(super) struct CacheMiddleware; @@ -125,7 +125,7 @@ mod tests { "no-cache, no-store, must-revalidate, max-age=0" )] #[test_case(CachePolicy::ForeverInCdnAndBrowser, "public, max-age=31104000")] - #[test_case(CachePolicy::ForeverOnlyInCdn, "public")] + #[test_case(CachePolicy::ForeverInCdn, "public")] #[test_case( CachePolicy::ForeverInCdnAndStaleInBrowser, "public, stale-while-revalidate=86400" diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 4f2ed891c..169575916 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -308,7 +308,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { req, Url::parse(&format!("{}/crate/{}/latest", redirect_base(req), name,)), ); - return Ok(super::cached_redirect(url, CachePolicy::ForeverOnlyInCdn)); + return Ok(super::cached_redirect(url, CachePolicy::ForeverInCdn)); } let mut conn = extension!(req, Pool).get()?; @@ -329,7 +329,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { )), ); - return Ok(super::cached_redirect(url, CachePolicy::ForeverOnlyInCdn)); + return Ok(super::cached_redirect(url, CachePolicy::ForeverInCdn)); } }; @@ -350,7 +350,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { let mut res = CrateDetailsPage { details }.into_response(req)?; res.extensions.insert::(if is_latest_url { - CachePolicy::ForeverOnlyInCdn + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }); @@ -1027,7 +1027,7 @@ mod tests { let resp = env.frontend().get("/crate/dummy/latest").send()?; assert!(resp.status().is_success()); - assert_cache_control(&resp, CachePolicy::ForeverOnlyInCdn, &env.config()); + assert_cache_control(&resp, CachePolicy::ForeverInCdn, &env.config()); assert!(resp.url().as_str().ends_with("/crate/dummy/latest")); let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); assert!(body.contains(" IronResult { let mut resp = Response::with((status::Found, Redirect(url))); resp.extensions - .insert::(CachePolicy::ForeverOnlyInCdn); + .insert::(CachePolicy::ForeverInCdn); Ok(resp) } @@ -190,7 +190,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { }; let cache = if version == "latest" { - CachePolicy::ForeverOnlyInCdn + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }; @@ -263,7 +263,7 @@ impl RustdocPage { let mut response = Response::with((Status::Ok, html)); response.headers.set(ContentType::html()); response.extensions.insert::(if is_latest_url { - CachePolicy::ForeverOnlyInCdn + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }); @@ -356,7 +356,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { // to prevent cloudfront caching the wrong artifacts on URLs with loose semver // versions, redirect the browser to the returned version instead of loading it // immediately - return redirect(&name, &v, &req_path, CachePolicy::ForeverOnlyInCdn); + return redirect(&name, &v, &req_path, CachePolicy::ForeverInCdn); } }; @@ -387,7 +387,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { &name, &version_or_latest, &req_path[1..], - CachePolicy::ForeverOnlyInCdn, + CachePolicy::ForeverInCdn, ); } @@ -427,7 +427,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { &name, &version_or_latest, &req_path, - CachePolicy::ForeverOnlyInCdn, + CachePolicy::ForeverInCdn, ) } else if req_path.first().map_or(false, |p| p.contains('-')) { // This is a target, not a module; it may not have been built. @@ -436,7 +436,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { &format!("/crate/{}", name), &format!("{}/target-redirect", version), &req_path, - CachePolicy::ForeverOnlyInCdn, + CachePolicy::ForeverInCdn, ) } else { Err(Nope::ResourceNotFound.into()) @@ -456,7 +456,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { // which means we cache slightly different for `/latest/` and // URLs with versions. response.extensions.insert::(if is_latest_url { - CachePolicy::ForeverOnlyInCdn + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }); @@ -701,7 +701,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let url = ctry!(req, Url::parse(&url)); let mut resp = Response::with((status::Found, Redirect(url))); resp.extensions.insert::(if is_latest_url { - CachePolicy::ForeverOnlyInCdn + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }); @@ -788,7 +788,7 @@ mod test { .next() { let link = elem.attributes.borrow().get("href").unwrap().to_string(); - assert_success_cached(&link, web, CachePolicy::ForeverOnlyInCdn, config)?; + assert_success_cached(&link, web, CachePolicy::ForeverInCdn, config)?; Ok(Some(link)) } else { Ok(None) @@ -898,7 +898,7 @@ mod test { assert_redirect_cached( "/dummy/0.1.0/x86_64-unknown-linux-gnu/dummy/", base, - CachePolicy::ForeverOnlyInCdn, + CachePolicy::ForeverInCdn, web, &env.config(), )?; @@ -954,7 +954,7 @@ mod test { .create()?; let resp = env.frontend().get("/dummy/latest/dummy/").send()?; - assert_cache_control(&resp, CachePolicy::ForeverOnlyInCdn, &env.config()); + assert_cache_control(&resp, CachePolicy::ForeverInCdn, &env.config()); assert!(resp.url().as_str().ends_with("/dummy/latest/dummy/")); let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); assert!(body.contains(" Date: Mon, 26 Sep 2022 21:34:19 +0200 Subject: [PATCH 08/12] revert permanent redirect to match current logic in rustdoc redirects --- src/web/rustdoc.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 3c6d51862..4ea0001ef 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -54,7 +54,12 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { } queries.extend(req.url.as_ref().query_pairs()); let url = ctry!(req, parse_url_with_params(&url_str, queries)); - let mut resp = Response::with((status::Found, Redirect(url))); + let status = if matches!(cache_policy, CachePolicy::ForeverInCdnAndBrowser) { + status::MovedPermanently + } else { + status::Found + }; + let mut resp = Response::with((status, Redirect(url))); resp.extensions.insert::(cache_policy); Ok(resp) } @@ -190,7 +195,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { }; let cache = if version == "latest" { - CachePolicy::ForeverInCdn + CachePolicy::ForeverInCdnAndBrowser } else { CachePolicy::ForeverInCdnAndStaleInBrowser }; @@ -1767,7 +1772,7 @@ mod test { } #[test] - fn test_redirect_to_latest_302() { + fn test_redirect_to_latest_301() { wrapper(|env| { env.fake_release().name("dummy").version("1.0.0").create()?; let web = env.frontend(); @@ -1777,10 +1782,10 @@ mod test { .unwrap(); let url = format!("http://{}/dummy", web.server_addr()); let resp = client.get(url).send()?; - assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!(resp.status(), StatusCode::MOVED_PERMANENTLY); assert_eq!( resp.headers().get("Cache-Control").unwrap(), - reqwest::header::HeaderValue::from_str("public").unwrap() + reqwest::header::HeaderValue::from_str("public, max-age=31104000").unwrap() ); assert!(resp .headers() From a9a180284a39c090baddfa172b44387497e0401d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 27 Sep 2022 05:30:21 +0200 Subject: [PATCH 09/12] Revert "revert permanent redirect to match current logic in rustdoc redirects" This reverts commit cdedb6583b8c6ccea1b19f7550b542ef9437e5e0. --- src/web/rustdoc.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 4ea0001ef..3c6d51862 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -54,12 +54,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { } queries.extend(req.url.as_ref().query_pairs()); let url = ctry!(req, parse_url_with_params(&url_str, queries)); - let status = if matches!(cache_policy, CachePolicy::ForeverInCdnAndBrowser) { - status::MovedPermanently - } else { - status::Found - }; - let mut resp = Response::with((status, Redirect(url))); + let mut resp = Response::with((status::Found, Redirect(url))); resp.extensions.insert::(cache_policy); Ok(resp) } @@ -195,7 +190,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { }; let cache = if version == "latest" { - CachePolicy::ForeverInCdnAndBrowser + CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }; @@ -1772,7 +1767,7 @@ mod test { } #[test] - fn test_redirect_to_latest_301() { + fn test_redirect_to_latest_302() { wrapper(|env| { env.fake_release().name("dummy").version("1.0.0").create()?; let web = env.frontend(); @@ -1782,10 +1777,10 @@ mod test { .unwrap(); let url = format!("http://{}/dummy", web.server_addr()); let resp = client.get(url).send()?; - assert_eq!(resp.status(), StatusCode::MOVED_PERMANENTLY); + assert_eq!(resp.status(), StatusCode::FOUND); assert_eq!( resp.headers().get("Cache-Control").unwrap(), - reqwest::header::HeaderValue::from_str("public, max-age=31104000").unwrap() + reqwest::header::HeaderValue::from_str("public").unwrap() ); assert!(resp .headers() From e5cdb8cc2569b7964d69cb7bfb68d8fb1b9ef03a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 27 Sep 2022 05:39:54 +0200 Subject: [PATCH 10/12] remove 'public' from cache-control headers --- src/web/cache.rs | 17 +++++++---------- src/web/rustdoc.rs | 5 +---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/web/cache.rs b/src/web/cache.rs index 3631ce583..28183dc34 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -51,10 +51,7 @@ impl CachePolicy { ] } CachePolicy::ForeverInCdnAndBrowser => { - vec![ - CacheDirective::Public, - CacheDirective::MaxAge(STATIC_FILE_CACHE_DURATION as u32), - ] + vec![CacheDirective::MaxAge(STATIC_FILE_CACHE_DURATION as u32)] } CachePolicy::ForeverInCdn => { // A missing `max-age` or `s-maxage` in the Cache-Control header will lead to @@ -62,7 +59,7 @@ impl CachePolicy { // This means we can have the CDN caching the documentation while just // issuing a purge after a build. // https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Expiration.html#ExpirationDownloadDist - vec![CacheDirective::Public] + vec![] } CachePolicy::ForeverInCdnAndStaleInBrowser => { let mut directives = CachePolicy::ForeverInCdn.render(config); @@ -124,11 +121,11 @@ mod tests { CachePolicy::NoStoreMustRevalidate, "no-cache, no-store, must-revalidate, max-age=0" )] - #[test_case(CachePolicy::ForeverInCdnAndBrowser, "public, max-age=31104000")] - #[test_case(CachePolicy::ForeverInCdn, "public")] + #[test_case(CachePolicy::ForeverInCdnAndBrowser, "max-age=31104000")] + #[test_case(CachePolicy::ForeverInCdn, "")] #[test_case( CachePolicy::ForeverInCdnAndStaleInBrowser, - "public, stale-while-revalidate=86400" + "stale-while-revalidate=86400" )] fn render(cache: CachePolicy, expected: &str) { wrapper(|env| { @@ -148,7 +145,7 @@ mod tests { assert_eq!( CacheControl(CachePolicy::ForeverInCdnAndStaleInBrowser.render(&env.config())) .to_string(), - "public" + "" ); Ok(()) }); @@ -163,7 +160,7 @@ mod tests { assert_eq!( CacheControl(CachePolicy::ForeverInCdnAndStaleInBrowser.render(&env.config())) .to_string(), - "public, stale-while-revalidate=666" + "stale-while-revalidate=666" ); Ok(()) }); diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 3c6d51862..a454ea05f 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -1778,10 +1778,7 @@ mod test { let url = format!("http://{}/dummy", web.server_addr()); let resp = client.get(url).send()?; assert_eq!(resp.status(), StatusCode::FOUND); - assert_eq!( - resp.headers().get("Cache-Control").unwrap(), - reqwest::header::HeaderValue::from_str("public").unwrap() - ); + assert!(resp.headers().get("Cache-Control").unwrap().is_empty()); assert!(resp .headers() .get("Location") From 050b967875acb26785e7ad10040930b7b031430f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 27 Sep 2022 05:49:52 +0200 Subject: [PATCH 11/12] revert cache settings for invocation specific rustdoc assets --- src/web/rustdoc.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index a454ea05f..16364cdf2 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -448,19 +448,10 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { if !path.ends_with(".html") { rendering_time.step("serve asset"); - let mut response = File(blob).serve(); // default asset caching behaviour is `Cache::ForeverInCdnAndBrowser`. - // We want invocation specific assets cached in the CDN but not in the browser. - // That way we can invalidate the CDN cache after a build. - // For CDN caching the same rules should apply as for HTML files, - // which means we cache slightly different for `/latest/` and - // URLs with versions. - response.extensions.insert::(if is_latest_url { - CachePolicy::ForeverInCdn - } else { - CachePolicy::ForeverInCdnAndStaleInBrowser - }); - return Ok(response); + // This is an edge-case when we serve invocation specific static assets under `/latest/`: + // https://github.com/rust-lang/docs.rs/issues/1593 + return Ok(File(blob).serve()); } rendering_time.step("find latest path"); @@ -850,7 +841,7 @@ mod test { assert_success_cached( "/buggy/0.1.0/directory_3/.gitignore", web, - CachePolicy::ForeverInCdnAndStaleInBrowser, + CachePolicy::ForeverInCdnAndBrowser, &env.config(), )?; assert_success_cached( @@ -868,7 +859,7 @@ mod test { assert_success_cached( "/buggy/0.1.0/directory_4/empty_file_no_ext", web, - CachePolicy::ForeverInCdnAndStaleInBrowser, + CachePolicy::ForeverInCdnAndBrowser, &env.config(), )?; Ok(()) From e40a8f023d2458978baba05dc2b4edde62c60725 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 29 Sep 2022 19:52:44 +0200 Subject: [PATCH 12/12] omit cache-control header when it would be empty --- src/test/mod.rs | 17 +++++++++++------ src/web/cache.rs | 5 ++++- src/web/rustdoc.rs | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/test/mod.rs b/src/test/mod.rs index 3b1c8e638..0f4db0599 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -60,12 +60,17 @@ pub(crate) fn assert_cache_control( config: &Config, ) { assert!(config.cache_control_stale_while_revalidate.is_some()); - assert_eq!( - res.headers() - .get("Cache-Control") - .expect("missing cache-control header"), - &CacheControl(cache_policy.render(config)).to_string() - ); + let cache_control = res.headers().get("Cache-Control"); + + let expected_directives = cache_policy.render(config); + if expected_directives.is_empty() { + assert!(cache_control.is_none()); + } else { + assert_eq!( + cache_control.expect("missing cache-control header"), + &CacheControl(expected_directives).to_string() + ); + } } /// Make sure that a URL returns a status code between 200-299 diff --git a/src/web/cache.rs b/src/web/cache.rs index 28183dc34..c01bfbae0 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -104,7 +104,10 @@ impl AfterMiddleware for CacheMiddleware { ); } - res.headers.set(CacheControl(cache.render(config))); + let directives = cache.render(config); + if !directives.is_empty() { + res.headers.set(CacheControl(directives)) + } Ok(res) } } diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 16364cdf2..70396d682 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -1769,7 +1769,7 @@ mod test { let url = format!("http://{}/dummy", web.server_addr()); let resp = client.get(url).send()?; assert_eq!(resp.status(), StatusCode::FOUND); - assert!(resp.headers().get("Cache-Control").unwrap().is_empty()); + assert!(resp.headers().get("Cache-Control").is_none()); assert!(resp .headers() .get("Location")