text decoding, take 3! (simplecursor edition)

7 months ago · abb9713fce
parent ffe9706f73
commit abb9713fce
2 changed files with 232 additions and 33 deletions
--- a/zing/src/encoding/text.rs
+++ b/zing/src/encoding/text.rs
@ -1,41 +1,38 @@
+use crate::utils::simplecursor::SimpleCursor;
+
 /// 5 bits
-#[derive(Clone, Copy, PartialEq)]
+#[derive(Clone, Copy, PartialEq, Debug)]
 struct ZChar(u8);

 /// technically 10 bits, but top two unused so they are dropped
-#[derive(PartialEq, Clone, Copy)]
+#[derive(PartialEq, Clone, Copy, Debug)]
 pub struct ZsciiChar(u8);

 pub type ZsciiString = Vec<ZsciiChar>;

+fn zscii_from_bytes(bytes: &[u8]) -> ZsciiString {
+    bytes.into_iter().cloned().map(ZsciiChar).collect()
+}
+
 /// Returns:
 /// - a result that wraps a ZsciiString, erroring if the slice terminates before the string ends
 /// - a usize indicating how many bytes were consumed
 pub fn decode_zchars(
-    zchars: &[u8], 
+    zchars: &mut SimpleCursor,
    alphabet_table_addr: usize,
-    abbreviations_table_addr: usize,
+    abbreviations_table: usize,
    memory: &[u8],
-) -> Option<(ZsciiString, usize)> {
-    fn cut_string(zchars: &[u8]) -> Option<Vec<u16>> {
-        let mut out = Vec::new();
-        for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) {
-            out.push(word);
-            if 0x8000 & word != 0 {
-                return Some(out);
-            }
-        }
-        None
-    }
+) -> Option<ZsciiString> {
    fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar {

        ZsciiChar(
            alphabet_table
            // ~'s indicate invalid characters (reserved values A2:6 and A2:7)
-            .unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) 
+            .unwrap_or(br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) 
            [26 * alphabet_number + codepoint as usize - 6]
        )
    }
+
    struct ZsciiSequence<'a, T>
    where T: Iterator<Item = ZChar> {
        zchars: std::iter::Peekable<T>,
@ -60,7 +57,7 @@ pub fn decode_zchars(
                        ZChar(0) => Some(ZsciiChar(32)),
                        ZChar(1..=3) => {
                            self.subseq = Some(Box::new(ZsciiSequence {
-                                zchars: todo!(),
+                                zchars: todo!("abbreviations"),
                                subseq: None,
                                alphabet_number: self.alphabet_number,
                                alphabet_table: self.alphabet_table
@ -89,25 +86,217 @@ pub fn decode_zchars(
        }
    }

+    struct ZWordIter<'a, 'b> {
+        cursor: &'a mut SimpleCursor<'b>,
+        should_continue: bool,
+    }
+
+    impl<'a, 'b> ZWordIter<'a, 'b> {
+        fn new(cursor: &'a mut SimpleCursor<'b>) -> Option<Self> {
+            Some(Self { cursor, should_continue: true })
+        }
+    }
+
+    impl<'a, 'b> Iterator for ZWordIter<'a, 'b> {
+        type Item = u16;
+
+        fn next(&mut self) -> Option<Self::Item> {
+            if self.should_continue {
+                let next_word = u16::from_be_bytes(*self.cursor.read_const()?);
+                if next_word & 0x8000 != 0 {
+                    self.should_continue = false;
+                }
+                Some(next_word)
+            } else {
+                None
+            }
+        }
+    }
+
    let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) };
-    let zwords = cut_string(zchars)?;
-    let consumed_length = zwords.len() * 2;
-    let zchars = zwords.iter()
-        .flat_map(|word| [
-            (word >> 10) & 0x1f,
-            (word >> 5) & 0x1f,
-            word & 0x1f
-        ])
-        .map(|word| ZChar(word as u8))
-        .peekable();
-    Some((ZsciiSequence {
+
+    let zwords = ZWordIter::new(zchars)?;
+    let zchars = zwords
+                .flat_map(|word| [
+                    (word >> 10) & 0x1f,
+                    (word >> 5) & 0x1f,
+                    word & 0x1f
+                ])
+                .map(|word| ZChar(word as u8))
+                .peekable();
+    Some(ZsciiSequence {
        zchars,
        subseq: None,
        alphabet_number: 0,
        alphabet_table,
-    }.collect(), consumed_length))
+    }.collect())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn decode_hello_world() {
+        // Encoding steps:
+        //
+        // Hello World
+        //
+        // A1: 0d
+        // A0: 0a 11 11 14
+        // 00
+        // A1: 1c
+        // A0: 14 17 11 09
+        //
+        // 04 0d 0a
+        // 11 11 14
+        // 00 04 1c
+        // 14 17 11
+        // 09 04 04
+        //
+        // 00100 01101 01010
+        // 10001 10001 10100
+        // 00000 00100 11100
+        // 10100 10111 10001
+        // 01001 00100 00100
+        //
+        // 0 00100 01101 01010
+        // 0 10001 10001 10100
+        // 0 00000 00100 11100
+        // 0 10100 10111 10001
+        // 1 01001 00100 00100
+        //
+        // 00010001 10101010
+        // 01000110 00110100
+        // 00000000 10011100
+        // 01010010 11110001
+        // 10100100 10000100
+        //
+        // 11 AA
+        // 46 34
+        // 00 9C
+        // 52 F1
+        // A4 84
+
+        // add a couple extra chars to ensure it leaves the cursor in the proper location
+        let memory_buf = &[
+            0x11,
+            0xaa,
+            0x46,
+            0x34,
+            0x00,
+            0x9c,
+            0x52,
+            0xf1,
+            0xa4,
+            0x84,
+            0x11,
+            0xaa
+        ];
+        let mut cur = SimpleCursor::new(memory_buf);
+        let out = decode_zchars(&mut cur, 0, 0, memory_buf);
+        assert_eq!(cur.read_const(), Some(&[0x11u8]));
+        assert_eq!(out, Some(zscii_from_bytes(b"Hello World")));
+    }
 }

+// pub fn decode_zchars_old_2(
+//     zchars: &[u8], 
+//     alphabet_table_addr: usize, 
+//     abbreviations_table_addr: usize,
+//     memory: &[u8],
+// ) -> Option<(ZsciiString, usize)> {
+//     fn cut_string(zchars: &[u8]) -> Option<Vec<u16>> {
+//         let mut out = Vec::new();
+//         for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) {
+//             out.push(word);
+//             if 0x8000 & word != 0 {
+//                 return Some(out);
+//             }
+//         }
+//         None
+//     }
+//     fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar {
+
+//         ZsciiChar(
+//             alphabet_table
+//             // ~'s indicate invalid characters (reserved values A2:6 and A2:7)
+//             .unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#) 
+//             [26 * alphabet_number + codepoint as usize - 6]
+//         )
+//     }
+//     struct ZsciiSequence<'a, T>
+//     where T: Iterator<Item = ZChar> {
+//         zchars: std::iter::Peekable<T>,
+//         subseq: Option<Box<ZsciiSequence<'a, T>>>,
+//         alphabet_number: usize,
+//         alphabet_table: Option<&'a [u8]>
+//     }
+//     impl<'a, T> Iterator for ZsciiSequence<'a, T>
+//     where T: Iterator<Item = ZChar> {
+//         type Item = ZsciiChar;
+
+//         fn next(&mut self) -> Option<Self::Item> {
+//             match self.subseq.as_mut() {
+//                 Some(subseq) => {
+//                     let out = subseq.next();
+//                     if subseq.next().is_none() { self.subseq = None; }
+//                     out
+//                 },
+//                 None => {
+//                     let char = self.zchars.next()?;
+//                     match char {
+//                         ZChar(0) => Some(ZsciiChar(32)),
+//                         ZChar(1..=3) => {
+//                             self.subseq = Some(Box::new(ZsciiSequence {
+//                                 zchars: todo!("abbreviations"),
+//                                 subseq: None,
+//                                 alphabet_number: self.alphabet_number,
+//                                 alphabet_table: self.alphabet_table
+//                             }));
+//                             self.next()
+//                         },
+//                         ZChar(4..=5) => {
+//                             if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(6)) {
+//                                 Some(ZsciiChar(13))
+//                             } else if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(7)) {
+//                                 let _ = self.zchars.next()?;
+//                                 let ZChar(z0) = self.zchars.next()?;
+//                                 let ZChar(z1) = self.zchars.next()?;
+//                                 Some(ZsciiChar((z0 << 5) | z1))
+//                             } else {
+//                                 self.alphabet_number = char.0 as usize - 3;
+//                                 let out = self.next();
+//                                 self.alphabet_number = 0;
+//                                 out
+//                             }
+//                         },
+//                         ZChar(_) => Some(get_from_alphabet(self.alphabet_number, char, self.alphabet_table))
+//                     }
+//                 }
+//             }
+//         }
+//     }
+
+//     let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) };
+//     let zwords = cut_string(zchars)?;
+//     let consumed_length = zwords.len() * 2;
+//     let zchars = zwords.iter()
+//         .flat_map(|word| [
+//             (word >> 10) & 0x1f,
+//             (word >> 5) & 0x1f,
+//             word & 0x1f
+//         ])
+//         .map(|word| ZChar(word as u8))
+//         .peekable();
+//     Some((ZsciiSequence {
+//         zchars,
+//         subseq: None,
+//         alphabet_number: 0,
+//         alphabet_table,
+//     }.collect(), consumed_length))
+// }
+


 // pub fn decode_zchars_old(
--- a/zing/src/utils/simplecursor.rs
+++ b/zing/src/utils/simplecursor.rs
@ -1,3 +1,5 @@
+use std::ops::Sub;
+
 #[derive(Clone)]
 pub struct SimpleCursor<'a> {
    buffer: &'a [u8],
@ -72,13 +74,21 @@ impl<'a> SimpleCursor<'a> {
        }
    }

+    pub fn seek_relative(&mut self, distance: isize) -> Result<(), ()> {
+        if -distance > self.index as isize {
+            Err(())
+        } else {
+            self.seek((self.index as isize + distance) as usize)
+        }
+    }
+
    pub fn at_end(&self) -> bool {
        self.index == self.buffer_length()
    }

-    pub fn buf(&self) -> &[u8] {
-        self.buffer
-    }
+    pub fn buf(&self) -> &[u8] { self.buffer }
+    
+    pub fn idx(&self) -> usize { self.index }
 }

 impl<'a> Iterator for SimpleCursor<'a> {