percent_encoding_lite/
lib.rs

1//! URL encoding/decoding functions
2//!
3//! Check [`encode`] and [`decode`] docs for example usage
4
5/// Bitmask that contains allowed character set
6#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
7pub struct Bitmask(pub [u32; 3]);
8
9impl Bitmask {
10    /// Checks if this bitmask contains `ch`
11    pub const fn contains(&self, ch: u8) -> bool {
12        if ch < 32 || ch > 127 { return false; }
13        self.0[(ch as usize - 32) / 32] & (1_u32 << (ch % 32)) != 0
14    }
15
16    /// Adds `ch` to this bitmask
17    pub const fn add(mut self, ch: u8) -> Bitmask {
18        if ch >= 32 && ch <= 127 {
19            self.0[(ch as usize - 32) / 32] |= 1_u32 << (ch % 32);
20        }
21        self
22    }
23
24    /// Adds all `chars` into this bitmask
25    pub const fn add_all(mut self, chars: &[u8]) -> Bitmask {
26        let mut i = 0;
27        while i < chars.len() {
28            self = self.add(chars[i]);
29            i += 1;
30        }
31        self
32    }
33
34    /// Removes `ch` from this bitmask
35    pub const fn remove(mut self, ch: u8) -> Bitmask {
36        if ch >= 32 && ch <= 127 {
37            self.0[(ch as usize - 32) / 32] &= !(1_u32 << (ch % 32));
38        }
39        self
40    }
41
42    /// Removes all `chars` from this bitmask
43    pub const fn remove_all(mut self, chars: &[u8]) -> Bitmask {
44        let mut i = 0;
45        while i < chars.len() {
46            self = self.remove(chars[i]);
47            i += 1;
48        }
49        self
50    }
51
52    pub const EMPTY: Bitmask = Bitmask([0, 0, 0]);
53    pub const URI_COMPONENT: Bitmask = Bitmask::EMPTY
54        .add_all(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
55        .add_all(b"abcdefghijklmnopqrstuvwxyz")
56        .add_all(b"0123456789")
57        .add_all(b"-_.!~*'()");
58    pub const URI: Bitmask = Bitmask::URI_COMPONENT.add_all(b";/?:@&=+$,#");
59    pub const RFC3986: Bitmask = Bitmask::URI.add_all(b"[]").remove_all(b"!'()*");
60    pub const PATH: Bitmask = Bitmask::URI_COMPONENT.add(b'/');
61}
62
63/// Encodes given slice using provided [`Bitmask`]
64/// # Example
65/// ```
66/// # use percent_encoding_lite::Bitmask;
67/// let string = "Dragonborn, dragonborn, by his honor is sworn";
68/// let encoded = percent_encoding_lite::encode(string, Bitmask::URI);
69/// assert_eq!(&encoded, "Dragonborn,+dragonborn,+by+his+honor+is+sworn");
70/// ```
71pub fn encode(src: impl AsRef<[u8]>, mask: Bitmask) -> String {
72    let src = src.as_ref();
73    let mut out = String::with_capacity(src.len());
74    for &ch in src.iter() {
75        if ch == b' ' {
76            out.push('+');
77        } else if mask.contains(ch) {
78            out.push(ch as char);
79        } else {
80            const HEX: &[u8] = b"0123456789ABCDEF";
81            out.push('%');
82            out.push(HEX[ch as usize >> 4] as char);
83            out.push(HEX[ch as usize & 0xF] as char);
84        }
85    }
86    out
87}
88
89/// Decodes a percent encoded string
90/// # Example
91/// ```
92/// let encoded = "%54%6F%20%6B%65%65%70%20%65%76%69%6C%20%66%6F%72%65%76%65%72%20%61%74%20%62%61%79%21";
93/// let decoded = percent_encoding_lite::decode(encoded);
94/// assert_eq!(&decoded, b"To keep evil forever at bay!");
95/// ```
96pub fn decode(src: impl AsRef<[u8]>) -> Vec<u8> {
97    let mut iter = src.as_ref().iter();
98    let mut out = vec![];
99    while let Some(&i) = iter.next() {
100        if i == b'+' {
101            out.push(b' ');
102        } else if i != b'%' {
103            out.push(i);
104        } else {
105            if iter.len() < 2 { out.push(i); iter.next(); continue; }
106            let (hi, lo) = (iter.as_slice()[0], iter.as_slice()[1]);
107            let digits = char::from(hi).to_digit(16).zip(char::from(lo).to_digit(16));
108            if digits.is_none() { out.push(i); iter.next(); continue; }
109            let (hi, lo) = digits.unwrap();
110            out.push((hi * 16 + lo) as u8);
111            iter.next(); iter.next();
112        }
113    }
114    out
115}
116
117/// Checks if this string contains any unencoded characters
118/// # Example
119/// ```
120/// # use percent_encoding_lite::{is_encoded, Bitmask};
121/// let string = "Dovahkiin, Dovahkiin, naal ok zin los vahriin, wah dein vokul mahfaeraak ahst vaal!";
122/// // contains comma = false
123/// assert!(!is_encoded(&string, Bitmask::URI_COMPONENT));
124/// ```
125pub fn is_encoded(src: impl AsRef<[u8]>, mask: Bitmask) -> bool {
126    let mask = mask.add(b'%');
127    for &ch in src.as_ref() {
128        if !mask.contains(ch) { return false; }
129    }
130    true
131}
132
133#[cfg(test)]
134mod test {
135    use super::*;
136    #[test]
137    fn urldecode_test() {
138        let encoded = "Anno+1404.Gold+Edition.v+2.1.5010.0.(%D0%9D%D0%BE%D0%B2%D1%8B%D0%B9+%D0%94%D0%B8%D1%81%D0%BA).(2010).Repack";
139        let decoded = String::from_utf8(decode(encoded)).unwrap();
140        let correct = "Anno 1404.Gold Edition.v 2.1.5010.0.(Новый Диск).(2010).Repack";
141        assert_eq!(&decoded, correct);
142
143        let encoded = "The+Elder+Scrolls+V.+Skyrim.+Anniversary+Edition+v.1.6.640.0.8+(2011-2021)";
144        let decoded = String::from_utf8(decode(encoded)).unwrap();
145        let correct = "The Elder Scrolls V. Skyrim. Anniversary Edition v.1.6.640.0.8 (2011-2021)";
146        assert_eq!(&decoded, correct);
147    }
148    #[test]
149    fn urlencode_test() {
150        let orig = "Microsoft Windows 10, version 22H2, build 19045.2846 (updated April 2023) - Оригинальные образы от Microsoft MSDN [Ru]";
151        let encoded = encode(orig, Bitmask::URI_COMPONENT);
152        let correct = "Microsoft+Windows+10%2C+version+22H2%2C+build+19045.2846+(updated+April+2023)+-+%D0%9E%D1%80%D0%B8%D0%B3%D0%B8%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B5+%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D1%8B+%D0%BE%D1%82+Microsoft+MSDN+%5BRu%5D";
153        assert_eq!(&encoded, correct);
154
155        let orig = "Windows_Embedded_8.1_Industry_Pro_with_Update_x86_x64_MultiLang";
156        let encoded = encode(orig, Bitmask::URI_COMPONENT);
157        assert_eq!(&encoded, orig);
158    }
159    #[test]
160    fn is_encoded_test() {
161        // Dot not allowed in URI_COMPONENT - not encoded
162        assert!(!is_encoded(",", Bitmask::URI_COMPONENT));
163        // Regular text - allowed
164        assert!(is_encoded("abc", Bitmask::URI_COMPONENT));
165        // Square bracket not in URI - not encoded
166        assert!(!is_encoded("abc[def", Bitmask::URI));
167        // Comma in URI - allowed
168        assert!(is_encoded("abc,def", Bitmask::URI));
169        // Percent sign means it is encoded
170        assert!(is_encoded("%01%02", Bitmask::URI));
171    }
172}