From b68ef65f31469e667ba5da9e4c5edb98ef5ebe3d Mon Sep 17 00:00:00 2001 From: Lucas Schumacher Date: Sun, 8 Sep 2024 06:28:49 -0400 Subject: [PATCH] Clean up main and switch to modelA --- src/main.rs | 125 +++++++++++++++------------------------------------ src/model.rs | 80 +++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 90 deletions(-) diff --git a/src/main.rs b/src/main.rs index c6fe660..68a21c3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,101 +1,46 @@ // https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html -use std::collections::HashMap; +#[allow(non_snake_case)] +mod modelA; mod bit_buffer; -use bit_buffer::{BitReader, BitWriter}; -mod model; -use model::{get_symbol, make_model, Model, ENGLISH}; +//mod model; +use modelA::ModelA; -fn encode(input: &[u8], model: &Model) -> Vec { - const HALF: u64 = 1 << (u64::BITS - 1); - const LOW_CONVERGE: u64 = 0b10 << (u64::BITS - 2); - const HIGH_CONVERGE: u64 = 0b01 << (u64::BITS - 2); - - let mut output = BitWriter::new(); - - let mut high = u64::MAX; - let mut low = u64::MIN; - let mut pending_bits = 0; - - for symbol in input { - let range = high - low; - let p = model.get(symbol).expect("Invalid/Unsupported data"); - high = low + (range as f64 * p.1) as u64; - low = low + (range as f64 * p.0) as u64; - loop { - if high < HALF { - output.write(false); - print!("0"); - while pending_bits > 0 { - output.write(true); - print!("1"); - pending_bits -= 1; - } - } else if low >= HALF { - output.write(true); - print!("1"); - while pending_bits > 0 { - output.write(true); - print!("0"); - pending_bits -= 1; - } - } else if low >= LOW_CONVERGE && high < HIGH_CONVERGE { - println!("BET"); - pending_bits += 1; - low <<= 1; - low &= HALF - 1; - high <<= 1; - high &= HALF + 1; - continue; - } else { - break; - } - low <<= 1; - high <<= 1; - high |= 1; - } - } - println!(""); - return output.flush(); -} - -fn decode(input: &[u8], model: &Model) -> Vec { - let mut high = 1.0; - let mut low = 0.0; - let mut output = vec![]; - for bit in BitReader::new(input) { - let diff = high - low; - if bit { - //print!("1"); - low = low + (diff / 2.0); - } else { - high = high - (diff / 2.0); - //print!("0"); - } - if let Some(symbol) = get_symbol(model, low, high) { - //println!("\nGot sym: {} from [{}, {})", symbol as char, low, high); - output.push(symbol); - let (slow, shigh) = model.get(&symbol).unwrap(); - let symdiff = *shigh - *slow; - high = (high - *slow) / symdiff; - low = (low - *slow) / symdiff; - } - } - - return output; -} fn main() { - let data = b"hello world-"; - println!("MODEL:"); - let model: Model = make_model(ENGLISH); + let data = b" +I'd just like to interject for a moment. What you're refering to as Linux, is in fact, GNU/Linux, or as I've re +aken to calling it, GNU plus Linux. Linux is not an operating system unto itself, but rather another free compo +a fully functioning GNU system made useful by the GNU corelibs, shell utilities and vital system components com +a full OS as defined by POSIX. + +Many computer users run a modified version of the GNU system every day, without realizing it. Through a peculia +f events, the version of GNU which is widely used today is often called Linux, and many of its users are not aw + it is basically the GNU system, developed by the GNU Project. + +There really is a Linux, and these people are using it, but it is just a part of the system they use. Linux is +el: the program in the system that allocates the machine's resources to the other programs that you run. The ke +an essential part of an operating system, but useless by itself; it can only function in the context of a compl +ating system. Linux is normally used in combination with the GNU operating system: the whole system is basicall +th Linux added, or GNU/Linux. All the so-called Linux distributions are really distributions of GNU/Linux! +"; + type CodeValue = u32; + println!("compressing..."); + let model: ModelA = ModelA::default(); + model.print_metrics(); println!(""); - let _enc = encode(data, &model); - let _dec = decode(&_enc, &model); - - println!("{}", String::from_utf8(_dec).unwrap()); + let enc = model.compress(data); + //println!("{}", enc.len()); + println!("ModelA compressed to {} bytes", enc.len()); println!( "Compression Ratio: {}", - data.len() as f64 / _enc.len() as f64 + data.len() as f64 / enc.len() as f64 ); + //println!("--------- Compressed data ---------\n{}", dump_hex(&enc)); + println!(""); + + println!("decompressing..."); + let model: ModelA = ModelA::default(); + let dec = model.decompress(&enc).unwrap(); + println!("{}", String::from_utf8_lossy(&dec)); } diff --git a/src/model.rs b/src/model.rs index c7a29ea..c83597d 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; pub type Model = HashMap; +use crate::bit_buffer::{BitReader, BitWriter}; pub fn get_symbol(model: &Model, low: f64, high: f64) -> Option { for (symbol, (start, end)) in model { @@ -51,3 +52,82 @@ pub const ENGLISH: &[(u8, f64)] = &[ (b' ', 0.01), (b'-', 0.02), ]; + +fn encode(input: &[u8], model: &Model) -> Vec { + const HALF: u64 = 1 << (u64::BITS - 1); + const LOW_CONVERGE: u64 = 0b10 << (u64::BITS - 2); + const HIGH_CONVERGE: u64 = 0b01 << (u64::BITS - 2); + + let mut output = BitWriter::new(); + + let mut high = u64::MAX; + let mut low = u64::MIN; + let mut pending_bits = 0; + + for symbol in input { + let range = high - low; + let p = model.get(symbol).expect("Invalid/Unsupported data"); + high = low + (range as f64 * p.1) as u64; + low = low + (range as f64 * p.0) as u64; + loop { + if high < HALF { + output.write(false); + print!("0"); + while pending_bits > 0 { + output.write(true); + print!("1"); + pending_bits -= 1; + } + } else if low >= HALF { + output.write(true); + print!("1"); + while pending_bits > 0 { + output.write(true); + print!("0"); + pending_bits -= 1; + } + } else if low >= LOW_CONVERGE && high < HIGH_CONVERGE { + println!("BET"); + pending_bits += 1; + low <<= 1; + low &= HALF - 1; + high <<= 1; + high &= HALF + 1; + continue; + } else { + break; + } + low <<= 1; + high <<= 1; + high |= 1; + } + } + println!(""); + return output.flush(); +} + +fn decode(input: &[u8], model: &Model) -> Vec { + let mut high = 1.0; + let mut low = 0.0; + let mut output = vec![]; + for bit in BitReader::new(input) { + let diff = high - low; + if bit { + //print!("1"); + low = low + (diff / 2.0); + } else { + high = high - (diff / 2.0); + //print!("0"); + } + if let Some(symbol) = get_symbol(model, low, high) { + //println!("\nGot sym: {} from [{}, {})", symbol as char, low, high); + output.push(symbol); + let (slow, shigh) = model.get(&symbol).unwrap(); + let symdiff = *shigh - *slow; + high = (high - *slow) / symdiff; + low = (low - *slow) / symdiff; + } + } + + return output; +}