Skip to content

Commit ee62c0b

Browse files
committed
Add support for unmarshalling unicode strings.
1 parent 84363e9 commit ee62c0b

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

src/marshal.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@ use std::io;
44
#[derive(Debug)]
55
pub enum UnmarshalError {
66
Io(io::Error),
7+
Decoding(::std::string::FromUtf8Error),
78
UnexpectedCode(String),
89
}
910

1011
impl fmt::Display for UnmarshalError {
1112
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1213
match *self {
1314
UnmarshalError::Io(ref e) => write!(f, "I/O error:").and_then(|_| e.fmt(f)),
15+
UnmarshalError::Decoding(ref e) => write!(f, "Decoding error:").and_then(|_| e.fmt(f)),
1416
UnmarshalError::UnexpectedCode(ref s) => write!(f, "{}", s),
1517
}
1618
}
@@ -37,14 +39,12 @@ pub enum Object {
3739
//List,
3840
//Dict,
3941
//Code,
40-
//Unicode,
4142
//Unknown,
4243
//Set,
4344
//FrozenSet,
4445
//Ref,
4546

4647
Bytes(Vec<u8>), // aka. ASCII in CPython's marshal
47-
//AsciiInterned,
4848
//SmallTuple,
4949
//ShortAscii,
5050
//ShortAsciiInterned
@@ -89,6 +89,19 @@ fn read_ascii_string<R: io::Read>(r: &mut R, size: usize) -> Result<String, Unma
8989
Ok(string)
9090
}
9191

92+
fn read_unicode_string<R: io::Read>(r: &mut R, size: usize) -> Result<String, UnmarshalError> {
93+
let mut buf = Vec::<u8>::new();
94+
buf.resize(size, 0);
95+
match r.read_exact(&mut buf) {
96+
Err(err) => return Err(UnmarshalError::Io(err)),
97+
Ok(()) => ()
98+
};
99+
match String::from_utf8(buf) {
100+
Err(err) => return Err(UnmarshalError::Decoding(err)),
101+
Ok(s) => Ok(s)
102+
}
103+
}
104+
92105
pub fn read_object<R: io::Read>(r: &mut R) -> Result<Object, UnmarshalError> {
93106
let byte = read_byte!(r);
94107
let _flag = byte & 0b10000000; // TODO: do something with this
@@ -103,6 +116,11 @@ pub fn read_object<R: io::Read>(r: &mut R) -> Result<Object, UnmarshalError> {
103116
let size = read_byte!(r) as usize;
104117
Object::String(try!(read_ascii_string(r, size)))
105118
},
119+
'u' => { // “unicode”
120+
let size = try!(read_long(r)) as usize; // TODO: overflow check if usize is smaller than u32
121+
Object::String(try!(read_unicode_string(r, size)))
122+
}
123+
106124
_ => panic!(format!("Unsupported opcode: {}", opcode as char)),
107125
};
108126
Ok(object)
@@ -139,4 +157,7 @@ fn test_string() {
139157

140158
let mut reader: &[u8] = b"\xda\x04foo\xe9"; // Note: this string was not generated with the marshal module
141159
assert_eq!(Object::String("fooé".to_string()), read_object(&mut reader).unwrap());
160+
161+
let mut reader: &[u8] = b"\xf5\x05\x00\x00\x00foo\xc3\xa9";
162+
assert_eq!(Object::String("fooé".to_string()), read_object(&mut reader).unwrap());
142163
}

0 commit comments

Comments
 (0)