Skip to content

Commit 84363e9

Browse files
committed
Add support for unmarshalling ascii strings.
1 parent 5974a42 commit 84363e9

File tree

1 file changed

+38
-9
lines changed

1 file changed

+38
-9
lines changed

src/marshal.rs

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub enum Object {
3030
//BinaryFloat,
3131
//Complex,
3232
//BinaryComplex,
33-
//String,
33+
String(::std::string::String),
3434
//Interned,
3535
//Ref_,
3636
//Tuple,
@@ -43,7 +43,7 @@ pub enum Object {
4343
//FrozenSet,
4444
//Ref,
4545

46-
//Ascii,
46+
Bytes(Vec<u8>), // aka. ASCII in CPython's marshal
4747
//AsciiInterned,
4848
//SmallTuple,
4949
//ShortAscii,
@@ -66,6 +66,29 @@ macro_rules! read_byte {
6666
}};
6767
}
6868

69+
fn read_long<R: io::Read>(r: &mut R) -> Result<u32, UnmarshalError> {
70+
let mut buf = [0, 0, 0, 0];
71+
match r.read_exact(&mut buf) {
72+
Err(err) => return Err(UnmarshalError::Io(err)),
73+
Ok(()) => Ok(buf[0] as u32 + 256*(buf[1] as u32 + 256*(buf[2] as u32 + 256*(buf[3] as u32))))
74+
}
75+
}
76+
77+
fn read_ascii_string<R: io::Read>(r: &mut R, size: usize) -> Result<String, UnmarshalError> {
78+
let mut buf = Vec::<u8>::new();
79+
buf.resize(size, 0);
80+
match r.read_exact(&mut buf) {
81+
Err(err) => return Err(UnmarshalError::Io(err)),
82+
Ok(()) => ()
83+
};
84+
let mut string = String::new();
85+
string.reserve(buf.len()); // The string will use more bytes than this if there are extended ascii characters, but it can't hurt to reserve anyway
86+
for c in buf {
87+
string.push(c as char);
88+
}
89+
Ok(string)
90+
}
91+
6992
pub fn read_object<R: io::Read>(r: &mut R) -> Result<Object, UnmarshalError> {
7093
let byte = read_byte!(r);
7194
let _flag = byte & 0b10000000; // TODO: do something with this
@@ -75,13 +98,10 @@ pub fn read_object<R: io::Read>(r: &mut R) -> Result<Object, UnmarshalError> {
7598
'N' => Object::None,
7699
'F' => Object::False,
77100
'T' => Object::True,
78-
'i' => {
79-
let mut buf = [0, 0, 0, 0];
80-
match r.read_exact(&mut buf) {
81-
Err(err) => return Err(UnmarshalError::Io(err)),
82-
Ok(()) => ()
83-
};
84-
Object::Int(buf[0] as u32 + 256*(buf[1] as u32 + 256*(buf[2] as u32 + 256*(buf[3] as u32))))
101+
'i' => Object::Int(try!(read_long(r))),
102+
'z' | 'Z' => { // “short ascii”, “short ascii interned”
103+
let size = read_byte!(r) as usize;
104+
Object::String(try!(read_ascii_string(r, size)))
85105
},
86106
_ => panic!(format!("Unsupported opcode: {}", opcode as char)),
87107
};
@@ -111,3 +131,12 @@ fn test_int() {
111131
let mut reader: &[u8] = b"\xe9\xe8\x03\x00\x00";
112132
assert_eq!(Object::Int(1000), read_object(&mut reader).unwrap());
113133
}
134+
135+
#[test]
136+
fn test_string() {
137+
let mut reader: &[u8] = b"\xda\x03foo";
138+
assert_eq!(Object::String("foo".to_string()), read_object(&mut reader).unwrap());
139+
140+
let mut reader: &[u8] = b"\xda\x04foo\xe9"; // Note: this string was not generated with the marshal module
141+
assert_eq!(Object::String("fooé".to_string()), read_object(&mut reader).unwrap());
142+
}

0 commit comments

Comments
 (0)