commit 6da0e73c5eddcfbf8d6e5367119a2dbf33e8493a Author: Lucas Schumacher Date: Sun Aug 18 10:48:06 2024 -0400 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6324a91 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "sac" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..578ace1 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,99 @@ +// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html +use std::collections::HashMap; + +type Model = HashMap; + +fn get_symbol(model: &Model, d: f64) -> Option { + // Brute force + for (symbol, (start, end)) in model { + if d >= *start && d < *end { + return Some(*symbol); + } + } + return None; +} + +fn encode(data: &[u8], model: &Model) -> f64 { + let mut high: f64 = 1.0; + let mut low: f64 = 0.0; + for symbol in data { + let p = model.get(symbol).expect("Invalid/Unsupported data"); + let range = high - low; + high = low + range * p.1; + low = low + range * p.0; + } + return low + (high - low) / 2.0; +} + +fn decode(message: f64, model: &Model) { + let mut high: f64 = 1.0; + let mut low: f64 = 0.0; + loop { + let range = high - low; + let d = (message - low) / range; + let c = match get_symbol(&model, d) { + Some(c) => c, + None => { + println!(""); + eprintln!("Decode error: d={d}"); + return; + } + }; + if c == b'-' { + println!(""); + return; + } + print!("{}", c as char); + let p = model.get(&c).expect("Decode error"); + high = low + range * p.1; + low = low + range * p.0; + } +} + +fn make_model(probabilities: &[(u8, f64)]) -> Model { + let mut model = HashMap::new(); + let mut end: f64 = 0.0; + for (symbol, probability) in probabilities { + let start: f64 = end; + end = start + probability; + model.insert(*symbol, (start, end)); + println!("{}: [{}, {})", *symbol as char, start, end); + } + return model; +} +const ENGLISH: &[(u8, f64)] = &[ + (b'a', 0.08), + (b'b', 0.01), + (b'c', 0.02), + (b'd', 0.04), + (b'e', 0.12), + (b'f', 0.02), + (b'g', 0.02), + (b'h', 0.06), + (b'i', 0.07), + (b'j', 0.01), + (b'k', 0.01), + (b'l', 0.04), + (b'm', 0.02), + (b'n', 0.06), + (b'o', 0.07), + (b'p', 0.01), + (b'q', 0.01), + (b'r', 0.06), + (b's', 0.06), + (b't', 0.09), + (b'u', 0.02), + (b'v', 0.01), + (b'w', 0.02), + (b'x', 0.01), + (b'y', 0.02), + (b'z', 0.01), + (b' ', 0.01), + (b'-', 0.02), +]; +fn main() { + let model: Model = make_model(ENGLISH); + let message = encode(b"hello world-", &model); + println!("{message}"); + decode(message, &model); +}