text decoding, take 3! (simplecursor edition)

main
aprzn 8 months ago
parent ffe9706f73
commit abb9713fce

@ -1,41 +1,38 @@
use crate::utils::simplecursor::SimpleCursor;
/// 5 bits
#[derive(Clone, Copy, PartialEq)]
#[derive(Clone, Copy, PartialEq, Debug)]
struct ZChar(u8);
/// technically 10 bits, but top two unused so they are dropped
#[derive(PartialEq, Clone, Copy)]
#[derive(PartialEq, Clone, Copy, Debug)]
pub struct ZsciiChar(u8);
pub type ZsciiString = Vec<ZsciiChar>;
fn zscii_from_bytes(bytes: &[u8]) -> ZsciiString {
bytes.into_iter().cloned().map(ZsciiChar).collect()
}
/// Returns:
/// - a result that wraps a ZsciiString, erroring if the slice terminates before the string ends
/// - a usize indicating how many bytes were consumed
pub fn decode_zchars(
zchars: &[u8],
alphabet_table_addr: usize,
abbreviations_table_addr: usize,
zchars: &mut SimpleCursor,
alphabet_table_addr: usize,
abbreviations_table: usize,
memory: &[u8],
) -> Option<(ZsciiString, usize)> {
fn cut_string(zchars: &[u8]) -> Option<Vec<u16>> {
let mut out = Vec::new();
for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) {
out.push(word);
if 0x8000 & word != 0 {
return Some(out);
}
}
None
}
) -> Option<ZsciiString> {
fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar {
ZsciiChar(
alphabet_table
// ~'s indicate invalid characters (reserved values A2:6 and A2:7)
.unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#)
.unwrap_or(br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#)
[26 * alphabet_number + codepoint as usize - 6]
)
}
struct ZsciiSequence<'a, T>
where T: Iterator<Item = ZChar> {
zchars: std::iter::Peekable<T>,
@ -60,7 +57,7 @@ pub fn decode_zchars(
ZChar(0) => Some(ZsciiChar(32)),
ZChar(1..=3) => {
self.subseq = Some(Box::new(ZsciiSequence {
zchars: todo!(),
zchars: todo!("abbreviations"),
subseq: None,
alphabet_number: self.alphabet_number,
alphabet_table: self.alphabet_table
@ -89,25 +86,217 @@ pub fn decode_zchars(
}
}
struct ZWordIter<'a, 'b> {
cursor: &'a mut SimpleCursor<'b>,
should_continue: bool,
}
impl<'a, 'b> ZWordIter<'a, 'b> {
fn new(cursor: &'a mut SimpleCursor<'b>) -> Option<Self> {
Some(Self { cursor, should_continue: true })
}
}
impl<'a, 'b> Iterator for ZWordIter<'a, 'b> {
type Item = u16;
fn next(&mut self) -> Option<Self::Item> {
if self.should_continue {
let next_word = u16::from_be_bytes(*self.cursor.read_const()?);
if next_word & 0x8000 != 0 {
self.should_continue = false;
}
Some(next_word)
} else {
None
}
}
}
let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) };
let zwords = cut_string(zchars)?;
let consumed_length = zwords.len() * 2;
let zchars = zwords.iter()
.flat_map(|word| [
(word >> 10) & 0x1f,
(word >> 5) & 0x1f,
word & 0x1f
])
.map(|word| ZChar(word as u8))
.peekable();
Some((ZsciiSequence {
let zwords = ZWordIter::new(zchars)?;
let zchars = zwords
.flat_map(|word| [
(word >> 10) & 0x1f,
(word >> 5) & 0x1f,
word & 0x1f
])
.map(|word| ZChar(word as u8))
.peekable();
Some(ZsciiSequence {
zchars,
subseq: None,
alphabet_number: 0,
alphabet_table,
}.collect(), consumed_length))
}.collect())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_hello_world() {
// Encoding steps:
//
// Hello World
//
// A1: 0d
// A0: 0a 11 11 14
// 00
// A1: 1c
// A0: 14 17 11 09
//
// 04 0d 0a
// 11 11 14
// 00 04 1c
// 14 17 11
// 09 04 04
//
// 00100 01101 01010
// 10001 10001 10100
// 00000 00100 11100
// 10100 10111 10001
// 01001 00100 00100
//
// 0 00100 01101 01010
// 0 10001 10001 10100
// 0 00000 00100 11100
// 0 10100 10111 10001
// 1 01001 00100 00100
//
// 00010001 10101010
// 01000110 00110100
// 00000000 10011100
// 01010010 11110001
// 10100100 10000100
//
// 11 AA
// 46 34
// 00 9C
// 52 F1
// A4 84
// add a couple extra chars to ensure it leaves the cursor in the proper location
let memory_buf = &[
0x11,
0xaa,
0x46,
0x34,
0x00,
0x9c,
0x52,
0xf1,
0xa4,
0x84,
0x11,
0xaa
];
let mut cur = SimpleCursor::new(memory_buf);
let out = decode_zchars(&mut cur, 0, 0, memory_buf);
assert_eq!(cur.read_const(), Some(&[0x11u8]));
assert_eq!(out, Some(zscii_from_bytes(b"Hello World")));
}
}
// pub fn decode_zchars_old_2(
// zchars: &[u8],
// alphabet_table_addr: usize,
// abbreviations_table_addr: usize,
// memory: &[u8],
// ) -> Option<(ZsciiString, usize)> {
// fn cut_string(zchars: &[u8]) -> Option<Vec<u16>> {
// let mut out = Vec::new();
// for word in zchars.chunks_exact(2).map(|c| u16::from_be_bytes([c[0], c[1]])) {
// out.push(word);
// if 0x8000 & word != 0 {
// return Some(out);
// }
// }
// None
// }
// fn get_from_alphabet(alphabet_number: usize, ZChar(codepoint): ZChar, alphabet_table: Option<&[u8]>) -> ZsciiChar {
// ZsciiChar(
// alphabet_table
// // ~'s indicate invalid characters (reserved values A2:6 and A2:7)
// .unwrap_or_else(|| br#"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~~0123456789.,!?_#'"/\-:()"#)
// [26 * alphabet_number + codepoint as usize - 6]
// )
// }
// struct ZsciiSequence<'a, T>
// where T: Iterator<Item = ZChar> {
// zchars: std::iter::Peekable<T>,
// subseq: Option<Box<ZsciiSequence<'a, T>>>,
// alphabet_number: usize,
// alphabet_table: Option<&'a [u8]>
// }
// impl<'a, T> Iterator for ZsciiSequence<'a, T>
// where T: Iterator<Item = ZChar> {
// type Item = ZsciiChar;
// fn next(&mut self) -> Option<Self::Item> {
// match self.subseq.as_mut() {
// Some(subseq) => {
// let out = subseq.next();
// if subseq.next().is_none() { self.subseq = None; }
// out
// },
// None => {
// let char = self.zchars.next()?;
// match char {
// ZChar(0) => Some(ZsciiChar(32)),
// ZChar(1..=3) => {
// self.subseq = Some(Box::new(ZsciiSequence {
// zchars: todo!("abbreviations"),
// subseq: None,
// alphabet_number: self.alphabet_number,
// alphabet_table: self.alphabet_table
// }));
// self.next()
// },
// ZChar(4..=5) => {
// if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(6)) {
// Some(ZsciiChar(13))
// } else if char == ZChar(5) && self.zchars.peek() == Some(&ZChar(7)) {
// let _ = self.zchars.next()?;
// let ZChar(z0) = self.zchars.next()?;
// let ZChar(z1) = self.zchars.next()?;
// Some(ZsciiChar((z0 << 5) | z1))
// } else {
// self.alphabet_number = char.0 as usize - 3;
// let out = self.next();
// self.alphabet_number = 0;
// out
// }
// },
// ZChar(_) => Some(get_from_alphabet(self.alphabet_number, char, self.alphabet_table))
// }
// }
// }
// }
// }
// let alphabet_table = if alphabet_table_addr == 0 { None } else { Some(memory.split_at(alphabet_table_addr).1) };
// let zwords = cut_string(zchars)?;
// let consumed_length = zwords.len() * 2;
// let zchars = zwords.iter()
// .flat_map(|word| [
// (word >> 10) & 0x1f,
// (word >> 5) & 0x1f,
// word & 0x1f
// ])
// .map(|word| ZChar(word as u8))
// .peekable();
// Some((ZsciiSequence {
// zchars,
// subseq: None,
// alphabet_number: 0,
// alphabet_table,
// }.collect(), consumed_length))
// }
// pub fn decode_zchars_old(

@ -1,3 +1,5 @@
use std::ops::Sub;
#[derive(Clone)]
pub struct SimpleCursor<'a> {
buffer: &'a [u8],
@ -72,13 +74,21 @@ impl<'a> SimpleCursor<'a> {
}
}
pub fn seek_relative(&mut self, distance: isize) -> Result<(), ()> {
if -distance > self.index as isize {
Err(())
} else {
self.seek((self.index as isize + distance) as usize)
}
}
pub fn at_end(&self) -> bool {
self.index == self.buffer_length()
}
pub fn buf(&self) -> &[u8] {
self.buffer
}
pub fn buf(&self) -> &[u8] { self.buffer }
pub fn idx(&self) -> usize { self.index }
}
impl<'a> Iterator for SimpleCursor<'a> {

Loading…
Cancel
Save