From c2bfa35bd98538910a09770767bf394229e106e6 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Fri, 2 May 2025 01:48:00 +0200 Subject: [PATCH] Allow urlencoded data URLs This automatically selects urlencoded data URLs if that results in smaller output then base64 encoding them. --- src/url.rs | 43 +++++++++++++++++++++++++-------- tests/cli/basic.rs | 6 ++--- tests/css/embed_css.rs | 4 +-- tests/session/retrieve_asset.rs | 4 +-- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/url.rs b/src/url.rs index 52aa1014..799e8e15 100644 --- a/src/url.rs +++ b/src/url.rs @@ -1,11 +1,35 @@ use base64::{prelude::BASE64_STANDARD, Engine}; -use percent_encoding::percent_decode_str; +use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS}; pub use url::Url; use crate::core::{detect_media_type, parse_content_type}; pub const EMPTY_IMAGE_DATA_URL: &str = "data:image/png,\ %89PNG%0D%0A%1A%0A%00%00%00%0DIHDR%00%00%00%0D%00%00%00%0D%08%04%00%00%00%D8%E2%2C%F7%00%00%00%11IDATx%DAcd%C0%09%18G%A5%28%96%02%00%0A%F8%00%0E%CB%8A%EB%16%00%00%00%00IEND%AEB%60%82"; +// https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 +const DATA_ESC: &AsciiSet = &CONTROLS + .add(b' ') + .add(b':') + .add(b'/') + .add(b'?') + .add(b'#') + .add(b'[') + .add(b']') + .add(b'@') + .add(b'!') + .add(b'$') + .add(b'&') + .add(b'\'') + .add(b'(') + .add(b')') + .add(b'*') + .add(b'+') + .add(b',') + .add(b';') + .add(b'=') + // make nesting and HTML embedding safe + .add(b'"') + .add(b'%'); pub fn clean_url(url: Url) -> Url { let mut url = url.clone(); @@ -33,15 +57,14 @@ pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset "".to_string() }; - data_url.set_path( - format!( - "{}{};base64,{}", - media_type, - c, - BASE64_STANDARD.encode(data) - ) - .as_str(), - ); + let base64 = BASE64_STANDARD.encode(data); + let urlenc = percent_encode(data, DATA_ESC).to_string(); + + if urlenc.len() < base64.len() { + data_url.set_path(format!("{}{},{}", media_type, c, urlenc).as_str()); + } else { + data_url.set_path(format!("{}{};base64,{}", media_type, c, base64).as_str()); + } data_url } diff --git a/tests/cli/basic.rs b/tests/cli/basic.rs index fbf94f13..94554c02 100644 --- a/tests/cli/basic.rs +++ b/tests/cli/basic.rs @@ -109,11 +109,11 @@ mod passing { @charset "UTF-8"; - @import "data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K"; + @import "data:text/css,body{background-color%3A%23000%3Bcolor%3A%23fff}%0A"; - @import url("data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K"); + @import url("data:text/css,body{background-color%3A%23000%3Bcolor%3A%23fff}%0A"); - @import url("data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K"); + @import url("data:text/css,body{background-color%3A%23000%3Bcolor%3A%23fff}%0A"); diff --git a/tests/css/embed_css.rs b/tests/css/embed_css.rs index 834b153b..f25e88f5 100644 --- a/tests/css/embed_css.rs +++ b/tests/css/embed_css.rs @@ -175,9 +175,9 @@ mod passing { "\ @charset \"UTF-8\";\n\ \n\ - @import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\ + @import \"data:text/css,html{background-color%3A%23000}\";\n\ \n\ - @import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\ + @import url(\"data:text/css,html{color%3A%23fff}\")\n\ " ); } diff --git a/tests/session/retrieve_asset.rs b/tests/session/retrieve_asset.rs index 0fcf7482..9a3f383f 100644 --- a/tests/session/retrieve_asset.rs +++ b/tests/session/retrieve_asset.rs @@ -33,7 +33,7 @@ mod passing { assert_eq!(&charset, "US-ASCII"); assert_eq!( url::create_data_url(&media_type, &charset, &data, &final_url), - Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), + Url::parse("data:text/html,target").unwrap(), ); assert_eq!( final_url, @@ -70,7 +70,7 @@ mod passing { .unwrap(); assert_eq!(&media_type, "text/javascript"); assert_eq!(&charset, ""); - let data_url = "data:text/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="; + let data_url = "data:text/javascript,document.body.style.backgroundColor%20%3D%20%22green%22%3B%0Adocument.body.style.color%20%3D%20%22red%22%3B%0A"; assert_eq!( url::create_data_url(&media_type, &charset, &data, &final_url), Url::parse(data_url).unwrap()