Skip to content

Commit eb3e3b1

Browse files
committed
Integrate percent encoding into Url type
Add `append_query_param` method to `Url` that handles percent encoding internally, rather than having external code use the encoding functions directly. This encapsulates the encoding logic within the `Url` type. This ensures that existing percent-encoded query parameters in URLs are not double-encoded when new parameters are added, fixing issue #468. Co-Authored-By: HAL 9000 Signed-off-by: Elias Rohrer <dev@tnull.de>
1 parent c99d016 commit eb3e3b1

File tree

4 files changed

+204
-67
lines changed

4 files changed

+204
-67
lines changed

bitreq/src/http_url.rs

Lines changed: 0 additions & 36 deletions
This file was deleted.

bitreq/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ mod client;
254254
#[cfg(feature = "std")]
255255
mod connection;
256256
mod error;
257-
mod http_url;
258257
#[cfg(feature = "proxy")]
259258
mod proxy;
260259
mod request;

bitreq/src/request.rs

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use std::time::Instant;
1212
use crate::connection::AsyncConnection;
1313
#[cfg(feature = "std")]
1414
use crate::connection::Connection;
15-
use crate::http_url::percent_encode_string;
1615
#[cfg(feature = "proxy")]
1716
use crate::proxy::Proxy;
1817
#[cfg(feature = "std")]
@@ -87,7 +86,7 @@ impl fmt::Display for Method {
8786
pub struct Request {
8887
pub(crate) method: Method,
8988
url: URL,
90-
params: String,
89+
params: Vec<(String, String)>,
9190
headers: BTreeMap<String, String>,
9291
body: Option<Vec<u8>>,
9392
timeout: Option<u64>,
@@ -112,7 +111,7 @@ impl Request {
112111
Request {
113112
method,
114113
url: url.into(),
115-
params: String::new(),
114+
params: Vec::new(),
116115
headers: BTreeMap::new(),
117116
body: None,
118117
timeout: None,
@@ -162,19 +161,9 @@ impl Request {
162161
/// Adds given key and value as query parameter to request url
163162
/// (resource).
164163
///
165-
/// The key and value are both encoded.
164+
/// The key and value are percent-encoded when the request is sent.
166165
pub fn with_param<T: Into<String>, U: Into<String>>(mut self, key: T, value: U) -> Request {
167-
let key = key.into();
168-
let key = percent_encode_string(&key);
169-
let value = value.into();
170-
let value = percent_encode_string(&value);
171-
172-
if !self.params.is_empty() {
173-
self.params.push('&');
174-
}
175-
self.params.push_str(&key);
176-
self.params.push('=');
177-
self.params.push_str(&value);
166+
self.params.push((key.into(), value.into()));
178167
self
179168
}
180169

@@ -378,8 +367,8 @@ impl ParsedRequest {
378367
Error::IoError(std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()))
379368
})?;
380369

381-
if !config.params.is_empty() {
382-
url.append_query_params(&config.params);
370+
for (key, value) in &config.params {
371+
url.append_query_param(key, value);
383372
}
384373

385374
#[cfg(all(feature = "proxy", feature = "std"))]

bitreq/src/url.rs

Lines changed: 198 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -381,29 +381,30 @@ impl Url {
381381
}
382382
}
383383

384-
/// Appends query parameters to the URL.
384+
/// Appends a single query parameter to the URL.
385385
///
386-
/// If the URL already has a query string, the parameters are appended with `&`.
387-
/// Otherwise, they are appended with `?`.
388-
pub(crate) fn append_query_params(&mut self, params: &str) {
389-
if params.is_empty() {
390-
return;
391-
}
386+
/// The key and value are percent-encoded before being appended.
387+
/// If the URL already has a query string, the parameter is appended with `&`.
388+
/// Otherwise, it is appended with `?`.
389+
pub(crate) fn append_query_param(&mut self, key: &str, value: &str) {
390+
let encoded_key = percent_encode_string(key);
391+
let encoded_value = percent_encode_string(value);
392+
let param = format!("{}={}", encoded_key, encoded_value);
392393

393394
let separator = if self.query.is_some() { "&" } else { "?" };
394395

395396
// Build the new serialization string
396397
let new_serialization = if let Some(frag) = self.fragment() {
397-
// Insert params before fragment
398+
// Insert param before fragment
398399
let frag_start = self.fragment.as_ref().unwrap().start - 1; // -1 for '#'
399-
format!("{}{}{}#{}", &self.serialization[..frag_start], separator, params, frag)
400+
format!("{}{}{}#{}", &self.serialization[..frag_start], separator, param, frag)
400401
} else {
401-
format!("{}{}{}", &self.serialization, separator, params)
402+
format!("{}{}{}", &self.serialization, separator, param)
402403
};
403404

404405
// Reparse to update all fields
405406
*self =
406-
Self::parse_inner(new_serialization).expect("append_query_params produced invalid URL");
407+
Self::parse_inner(new_serialization).expect("append_query_param produced invalid URL");
407408
}
408409

409410
/// If this URL has no fragment but `other` does, copies the fragment from `other`.
@@ -426,8 +427,8 @@ impl Url {
426427
#[cfg(feature = "std")]
427428
pub(crate) fn write_base_url_to<W: std::fmt::Write>(&self, dst: &mut W) -> std::fmt::Result {
428429
write!(dst, "{}://{}", self.scheme(), self.base_url())?;
429-
if let Some(port) = self.port() {
430-
write!(dst, ":{}", port)?;
430+
if self.has_explicit_non_default_port() {
431+
write!(dst, ":{}", self.port())?;
431432
}
432433
Ok(())
433434
}
@@ -457,6 +458,41 @@ impl std::fmt::Display for Url {
457458
}
458459
}
459460

461+
/// Returns the `%HH` triplet representing `byte` for percent encoding.
462+
fn percent_encoded_triplet(byte: u8) -> [char; 3] {
463+
const HEX: &[u8; 16] = b"0123456789ABCDEF";
464+
['%', HEX[(byte >> 4) as usize] as char, HEX[(byte & 0x0F) as usize] as char]
465+
}
466+
467+
/// Percent-encodes a char and appends it to `result`.
468+
/// Unreserved characters (0-9, A-Z, a-z, -, ., _, ~) are not encoded.
469+
fn percent_encode_char(c: char, result: &mut String) {
470+
match c {
471+
// All URL-'safe' characters are not encoded
472+
'0'..='9' | 'A'..='Z' | 'a'..='z' | '-' | '.' | '_' | '~' => {
473+
result.push(c);
474+
}
475+
_ => {
476+
// Any UTF-8 character can fit in 4 bytes
477+
let mut utf8_buf = [0u8; 4];
478+
c.encode_utf8(&mut utf8_buf).as_bytes().iter().for_each(|byte| {
479+
for ch in percent_encoded_triplet(*byte) {
480+
result.push(ch);
481+
}
482+
});
483+
}
484+
}
485+
}
486+
487+
/// Percent-encodes the entire input string and returns the encoded version.
488+
fn percent_encode_string(input: &str) -> String {
489+
let mut encoded = String::with_capacity(input.len());
490+
for ch in input.chars() {
491+
percent_encode_char(ch, &mut encoded);
492+
}
493+
encoded
494+
}
495+
460496
#[cfg(test)]
461497
mod tests {
462498
use super::*;
@@ -765,4 +801,153 @@ mod tests {
765801
fn assert_error<E: std::error::Error>(_: &E) {}
766802
assert_error(&ParseError::EmptyInput);
767803
}
804+
805+
#[test]
806+
fn percent_encode_unreserved_chars_unchanged() {
807+
// RFC 3986 unreserved characters should not be encoded
808+
assert_eq!(percent_encode_string("abc"), "abc");
809+
assert_eq!(percent_encode_string("ABC"), "ABC");
810+
assert_eq!(percent_encode_string("0123456789"), "0123456789");
811+
assert_eq!(percent_encode_string("-._~"), "-._~");
812+
}
813+
814+
#[test]
815+
fn percent_encode_reserved_chars() {
816+
// Reserved characters should be encoded
817+
assert_eq!(percent_encode_string(" "), "%20");
818+
assert_eq!(percent_encode_string("!"), "%21");
819+
assert_eq!(percent_encode_string("#"), "%23");
820+
assert_eq!(percent_encode_string("$"), "%24");
821+
assert_eq!(percent_encode_string("&"), "%26");
822+
assert_eq!(percent_encode_string("'"), "%27");
823+
assert_eq!(percent_encode_string("("), "%28");
824+
assert_eq!(percent_encode_string(")"), "%29");
825+
assert_eq!(percent_encode_string("*"), "%2A");
826+
assert_eq!(percent_encode_string("+"), "%2B");
827+
assert_eq!(percent_encode_string(","), "%2C");
828+
assert_eq!(percent_encode_string("/"), "%2F");
829+
assert_eq!(percent_encode_string(":"), "%3A");
830+
assert_eq!(percent_encode_string(";"), "%3B");
831+
assert_eq!(percent_encode_string("="), "%3D");
832+
assert_eq!(percent_encode_string("?"), "%3F");
833+
assert_eq!(percent_encode_string("@"), "%40");
834+
assert_eq!(percent_encode_string("["), "%5B");
835+
assert_eq!(percent_encode_string("]"), "%5D");
836+
}
837+
838+
#[test]
839+
fn percent_encode_unicode() {
840+
// Unicode characters should be encoded as UTF-8 bytes
841+
assert_eq!(percent_encode_string("ó"), "%C3%B3");
842+
assert_eq!(percent_encode_string("ò"), "%C3%B2");
843+
assert_eq!(percent_encode_string("👀"), "%F0%9F%91%80");
844+
assert_eq!(percent_encode_string("日本語"), "%E6%97%A5%E6%9C%AC%E8%AA%9E");
845+
}
846+
847+
#[test]
848+
fn percent_encode_mixed_string() {
849+
assert_eq!(percent_encode_string("hello world"), "hello%20world");
850+
assert_eq!(percent_encode_string("foo=bar"), "foo%3Dbar");
851+
assert_eq!(percent_encode_string("what's this? 👀"), "what%27s%20this%3F%20%F0%9F%91%80");
852+
}
853+
854+
#[test]
855+
fn percent_encode_percent_sign() {
856+
// The percent sign itself must be encoded
857+
assert_eq!(percent_encode_string("%"), "%25");
858+
assert_eq!(percent_encode_string("%7B"), "%257B");
859+
}
860+
861+
#[test]
862+
fn append_query_param_to_url_without_query() {
863+
let mut url = Url::parse("http://example.com/path").unwrap();
864+
url.append_query_param("foo", "bar");
865+
assert_eq!(url.query(), Some("foo=bar"));
866+
assert_eq!(url.as_str(), "http://example.com/path?foo=bar");
867+
}
868+
869+
#[test]
870+
fn append_query_param_to_url_with_existing_query() {
871+
let mut url = Url::parse("http://example.com/path?existing=value").unwrap();
872+
url.append_query_param("foo", "bar");
873+
assert_eq!(url.query(), Some("existing=value&foo=bar"));
874+
assert_eq!(url.as_str(), "http://example.com/path?existing=value&foo=bar");
875+
}
876+
877+
#[test]
878+
fn append_query_param_encodes_special_chars() {
879+
let mut url = Url::parse("http://example.com").unwrap();
880+
url.append_query_param("key with spaces", "value&special=chars");
881+
assert_eq!(url.query(), Some("key%20with%20spaces=value%26special%3Dchars"));
882+
}
883+
884+
#[test]
885+
fn append_query_param_encodes_unicode() {
886+
let mut url = Url::parse("http://example.com").unwrap();
887+
url.append_query_param("ówò", "what's this? 👀");
888+
assert_eq!(url.query(), Some("%C3%B3w%C3%B2=what%27s%20this%3F%20%F0%9F%91%80"));
889+
}
890+
891+
#[test]
892+
fn append_query_param_preserves_fragment() {
893+
let mut url = Url::parse("http://example.com/path#section").unwrap();
894+
url.append_query_param("foo", "bar");
895+
assert_eq!(url.query(), Some("foo=bar"));
896+
assert_eq!(url.fragment(), Some("section"));
897+
assert_eq!(url.as_str(), "http://example.com/path?foo=bar#section");
898+
}
899+
900+
#[test]
901+
fn append_query_param_to_url_with_query_and_fragment() {
902+
let mut url = Url::parse("http://example.com/path?existing=value#section").unwrap();
903+
url.append_query_param("foo", "bar");
904+
assert_eq!(url.query(), Some("existing=value&foo=bar"));
905+
assert_eq!(url.fragment(), Some("section"));
906+
assert_eq!(url.as_str(), "http://example.com/path?existing=value&foo=bar#section");
907+
}
908+
909+
#[test]
910+
fn append_query_param_multiple_params() {
911+
let mut url = Url::parse("http://example.com").unwrap();
912+
url.append_query_param("a", "1");
913+
url.append_query_param("b", "2");
914+
url.append_query_param("c", "3");
915+
assert_eq!(url.query(), Some("a=1&b=2&c=3"));
916+
}
917+
918+
#[test]
919+
fn no_double_encoding_existing_query_params() {
920+
// When a URL already has percent-encoded query params,
921+
// they should NOT be re-encoded when new params are appended.
922+
// This is the fix for issue #468.
923+
let mut url = Url::parse("http://example.com/test?query=%7B%22id%22%7D").unwrap();
924+
925+
// Verify the existing encoded query is preserved as-is
926+
assert_eq!(url.query(), Some("query=%7B%22id%22%7D"));
927+
928+
// Add a new param
929+
url.append_query_param("foo", "bar");
930+
931+
// The existing encoded query should still be preserved, not double-encoded
932+
// i.e., %7B should NOT become %257B
933+
assert_eq!(url.query(), Some("query=%7B%22id%22%7D&foo=bar"));
934+
assert_eq!(url.as_str(), "http://example.com/test?query=%7B%22id%22%7D&foo=bar");
935+
}
936+
937+
#[test]
938+
fn no_double_encoding_complex_encoded_url() {
939+
// Test with a more complex encoded URL
940+
let mut url =
941+
Url::parse("http://example.com/api?filter=%7B%22name%22%3A%22test%22%7D").unwrap();
942+
943+
// Original query should be preserved
944+
assert_eq!(url.query(), Some("filter=%7B%22name%22%3A%22test%22%7D"));
945+
946+
// Add multiple new params
947+
url.append_query_param("page", "1");
948+
url.append_query_param("sort", "name");
949+
950+
// Verify no double encoding occurred
951+
assert_eq!(url.query(), Some("filter=%7B%22name%22%3A%22test%22%7D&page=1&sort=name"));
952+
}
768953
}

0 commit comments

Comments
 (0)