From abb9713fce4e0d00fc8a3367deb4da71f75c0a25 Mon Sep 17 00:00:00 2001 From: aprzn Date: Sat, 18 May 2024 10:59:23 -0400 Subject: [PATCH] text decoding, take 3! (simplecursor edition) --- zing/src/encoding/text.rs | 249 +++++++++++++++++++++++++++++---- zing/src/utils/simplecursor.rs | 16 ++- 2 files changed, 232 insertions(+), 33 deletions(-) diff --git a/zing/src/encoding/text.rs b/zing/src/encoding/text.rs index 6cf45ce..1196e67 100644 --- a/zing/src/encoding/text.rs +++ b/zing/src/encoding/text.rs @@ -1,41 +1,38 @@ +use crate::utils::simplecursor::SimpleCursor; + /// 5 bits -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, Debug)] struct ZChar(u8); /// technically 10 bits, but top two unused so they are dropped -#[derive(PartialEq, Clone, Copy)] +#[derive(PartialEq, Clone, Copy, Debug)] pub struct ZsciiChar(u8); pub type ZsciiString = Vec; +fn zscii_from_bytes(bytes: &[u8]) -> ZsciiString { + bytes.into_iter().cloned().map(ZsciiChar).collect() +} + /// Returns: /// - a result that wraps a ZsciiString, erroring if the slice terminates before the string ends /// - a usize indicating how many bytes were consumed pub fn decode_zchars( - zchars: &[u8], - alphabet_table_addr: usize, - abbreviations_table_addr: usize, + zchars: &mut SimpleCursor, + alphabet_table_addr: usize, + abbreviations_table: usize, memory: &[u8], -) -> Option<(ZsciiString, usize)> { - fn cut_string(zchars: &[u8]) -> Option> { - let mut out = Vec::new(); - for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) { - out.push(word); - if 0x8000 & word != 0 { - return Some(out); - } - } - None - } +) -> Option { fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar { ZsciiChar( alphabet_table // ~'s indicate invalid characters (reserved values A2:6 and A2:7) - .unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) + .unwrap_or(br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) [26 * alphabet_number + codepoint as usize - 6] ) } + struct ZsciiSequence<'a, T> where T: Iterator { zchars: std::iter::Peekable, @@ -60,7 +57,7 @@ pub fn decode_zchars( ZChar(0) => Some(ZsciiChar(32)), ZChar(1..=3) => { self.subseq = Some(Box::new(ZsciiSequence { - zchars: todo!(), + zchars: todo!("abbreviations"), subseq: None, alphabet_number: self.alphabet_number, alphabet_table: self.alphabet_table @@ -89,25 +86,217 @@ pub fn decode_zchars( } } + struct ZWordIter<'a, 'b> { + cursor: &'a mut SimpleCursor<'b>, + should_continue: bool, + } + + impl<'a, 'b> ZWordIter<'a, 'b> { + fn new(cursor: &'a mut SimpleCursor<'b>) -> Option { + Some(Self { cursor, should_continue: true }) + } + } + + impl<'a, 'b> Iterator for ZWordIter<'a, 'b> { + type Item = u16; + + fn next(&mut self) -> Option { + if self.should_continue { + let next_word = u16::from_be_bytes(*self.cursor.read_const()?); + if next_word & 0x8000 != 0 { + self.should_continue = false; + } + Some(next_word) + } else { + None + } + } + } + let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) }; - let zwords = cut_string(zchars)?; - let consumed_length = zwords.len() * 2; - let zchars = zwords.iter() - .flat_map(|word| [ - (word >> 10) & 0x1f, - (word >> 5) & 0x1f, - word & 0x1f - ]) - .map(|word| ZChar(word as u8)) - .peekable(); - Some((ZsciiSequence { + + let zwords = ZWordIter::new(zchars)?; + let zchars = zwords + .flat_map(|word| [ + (word >> 10) & 0x1f, + (word >> 5) & 0x1f, + word & 0x1f + ]) + .map(|word| ZChar(word as u8)) + .peekable(); + Some(ZsciiSequence { zchars, subseq: None, alphabet_number: 0, alphabet_table, - }.collect(), consumed_length)) + }.collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decode_hello_world() { + // Encoding steps: + // + // Hello World + // + // A1: 0d + // A0: 0a 11 11 14 + // 00 + // A1: 1c + // A0: 14 17 11 09 + // + // 04 0d 0a + // 11 11 14 + // 00 04 1c + // 14 17 11 + // 09 04 04 + // + // 00100 01101 01010 + // 10001 10001 10100 + // 00000 00100 11100 + // 10100 10111 10001 + // 01001 00100 00100 + // + // 0 00100 01101 01010 + // 0 10001 10001 10100 + // 0 00000 00100 11100 + // 0 10100 10111 10001 + // 1 01001 00100 00100 + // + // 00010001 10101010 + // 01000110 00110100 + // 00000000 10011100 + // 01010010 11110001 + // 10100100 10000100 + // + // 11 AA + // 46 34 + // 00 9C + // 52 F1 + // A4 84 + + // add a couple extra chars to ensure it leaves the cursor in the proper location + let memory_buf = &[ + 0x11, + 0xaa, + 0x46, + 0x34, + 0x00, + 0x9c, + 0x52, + 0xf1, + 0xa4, + 0x84, + 0x11, + 0xaa + ]; + let mut cur = SimpleCursor::new(memory_buf); + let out = decode_zchars(&mut cur, 0, 0, memory_buf); + assert_eq!(cur.read_const(), Some(&[0x11u8])); + assert_eq!(out, Some(zscii_from_bytes(b"Hello World"))); + } } +// pub fn decode_zchars_old_2( +// zchars: &[u8], +// alphabet_table_addr: usize, +// abbreviations_table_addr: usize, +// memory: &[u8], +// ) -> Option<(ZsciiString, usize)> { +// fn cut_string(zchars: &[u8]) -> Option> { +// let mut out = Vec::new(); +// for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) { +// out.push(word); +// if 0x8000 & word != 0 { +// return Some(out); +// } +// } +// None +// } +// fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar { + +// ZsciiChar( +// alphabet_table +// // ~'s indicate invalid characters (reserved values A2:6 and A2:7) +// .unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) +// [26 * alphabet_number + codepoint as usize - 6] +// ) +// } +// struct ZsciiSequence<'a, T> +// where T: Iterator { +// zchars: std::iter::Peekable, +// subseq: Option>>, +// alphabet_number: usize, +// alphabet_table: Option<&'a [u8]> +// } +// impl<'a, T> Iterator for ZsciiSequence<'a, T> +// where T: Iterator { +// type Item = ZsciiChar; + +// fn next(&mut self) -> Option { +// match self.subseq.as_mut() { +// Some(subseq) => { +// let out = subseq.next(); +// if subseq.next().is_none() { self.subseq = None; } +// out +// }, +// None => { +// let char = self.zchars.next()?; +// match char { +// ZChar(0) => Some(ZsciiChar(32)), +// ZChar(1..=3) => { +// self.subseq = Some(Box::new(ZsciiSequence { +// zchars: todo!("abbreviations"), +// subseq: None, +// alphabet_number: self.alphabet_number, +// alphabet_table: self.alphabet_table +// })); +// self.next() +// }, +// ZChar(4..=5) => { +// if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(6)) { +// Some(ZsciiChar(13)) +// } else if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(7)) { +// let _ = self.zchars.next()?; +// let ZChar(z0) = self.zchars.next()?; +// let ZChar(z1) = self.zchars.next()?; +// Some(ZsciiChar((z0 << 5) | z1)) +// } else { +// self.alphabet_number = char.0 as usize - 3; +// let out = self.next(); +// self.alphabet_number = 0; +// out +// } +// }, +// ZChar(_) => Some(get_from_alphabet(self.alphabet_number, char, self.alphabet_table)) +// } +// } +// } +// } +// } + +// let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) }; +// let zwords = cut_string(zchars)?; +// let consumed_length = zwords.len() * 2; +// let zchars = zwords.iter() +// .flat_map(|word| [ +// (word >> 10) & 0x1f, +// (word >> 5) & 0x1f, +// word & 0x1f +// ]) +// .map(|word| ZChar(word as u8)) +// .peekable(); +// Some((ZsciiSequence { +// zchars, +// subseq: None, +// alphabet_number: 0, +// alphabet_table, +// }.collect(), consumed_length)) +// } + // pub fn decode_zchars_old( diff --git a/zing/src/utils/simplecursor.rs b/zing/src/utils/simplecursor.rs index 8b18f10..083d863 100644 --- a/zing/src/utils/simplecursor.rs +++ b/zing/src/utils/simplecursor.rs @@ -1,3 +1,5 @@ +use std::ops::Sub; + #[derive(Clone)] pub struct SimpleCursor<'a> { buffer: &'a [u8], @@ -72,13 +74,21 @@ impl<'a> SimpleCursor<'a> { } } + pub fn seek_relative(&mut self, distance: isize) -> Result<(), ()> { + if -distance > self.index as isize { + Err(()) + } else { + self.seek((self.index as isize + distance) as usize) + } + } + pub fn at_end(&self) -> bool { self.index == self.buffer_length() } - pub fn buf(&self) -> &[u8] { - self.buffer - } + pub fn buf(&self) -> &[u8] { self.buffer } + + pub fn idx(&self) -> usize { self.index } } impl<'a> Iterator for SimpleCursor<'a> {