Clean up main and switch to modelA
This commit is contained in:
parent
5eac451458
commit
b68ef65f31
125
src/main.rs
125
src/main.rs
@ -1,101 +1,46 @@
|
||||
// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
|
||||
use std::collections::HashMap;
|
||||
#[allow(non_snake_case)]
|
||||
mod modelA;
|
||||
|
||||
mod bit_buffer;
|
||||
use bit_buffer::{BitReader, BitWriter};
|
||||
mod model;
|
||||
use model::{get_symbol, make_model, Model, ENGLISH};
|
||||
//mod model;
|
||||
use modelA::ModelA;
|
||||
|
||||
fn encode(input: &[u8], model: &Model) -> Vec<u8> {
|
||||
const HALF: u64 = 1 << (u64::BITS - 1);
|
||||
const LOW_CONVERGE: u64 = 0b10 << (u64::BITS - 2);
|
||||
const HIGH_CONVERGE: u64 = 0b01 << (u64::BITS - 2);
|
||||
|
||||
let mut output = BitWriter::new();
|
||||
|
||||
let mut high = u64::MAX;
|
||||
let mut low = u64::MIN;
|
||||
let mut pending_bits = 0;
|
||||
|
||||
for symbol in input {
|
||||
let range = high - low;
|
||||
let p = model.get(symbol).expect("Invalid/Unsupported data");
|
||||
high = low + (range as f64 * p.1) as u64;
|
||||
low = low + (range as f64 * p.0) as u64;
|
||||
loop {
|
||||
if high < HALF {
|
||||
output.write(false);
|
||||
print!("0");
|
||||
while pending_bits > 0 {
|
||||
output.write(true);
|
||||
print!("1");
|
||||
pending_bits -= 1;
|
||||
}
|
||||
} else if low >= HALF {
|
||||
output.write(true);
|
||||
print!("1");
|
||||
while pending_bits > 0 {
|
||||
output.write(true);
|
||||
print!("0");
|
||||
pending_bits -= 1;
|
||||
}
|
||||
} else if low >= LOW_CONVERGE && high < HIGH_CONVERGE {
|
||||
println!("BET");
|
||||
pending_bits += 1;
|
||||
low <<= 1;
|
||||
low &= HALF - 1;
|
||||
high <<= 1;
|
||||
high &= HALF + 1;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
low <<= 1;
|
||||
high <<= 1;
|
||||
high |= 1;
|
||||
}
|
||||
}
|
||||
println!("");
|
||||
return output.flush();
|
||||
}
|
||||
|
||||
fn decode(input: &[u8], model: &Model) -> Vec<u8> {
|
||||
let mut high = 1.0;
|
||||
let mut low = 0.0;
|
||||
let mut output = vec![];
|
||||
for bit in BitReader::new(input) {
|
||||
let diff = high - low;
|
||||
if bit {
|
||||
//print!("1");
|
||||
low = low + (diff / 2.0);
|
||||
} else {
|
||||
high = high - (diff / 2.0);
|
||||
//print!("0");
|
||||
}
|
||||
if let Some(symbol) = get_symbol(model, low, high) {
|
||||
//println!("\nGot sym: {} from [{}, {})", symbol as char, low, high);
|
||||
output.push(symbol);
|
||||
let (slow, shigh) = model.get(&symbol).unwrap();
|
||||
let symdiff = *shigh - *slow;
|
||||
high = (high - *slow) / symdiff;
|
||||
low = (low - *slow) / symdiff;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
fn main() {
|
||||
let data = b"hello world-";
|
||||
println!("MODEL:");
|
||||
let model: Model = make_model(ENGLISH);
|
||||
let data = b"
|
||||
I'd just like to interject for a moment. What you're refering to as Linux, is in fact, GNU/Linux, or as I've re
|
||||
aken to calling it, GNU plus Linux. Linux is not an operating system unto itself, but rather another free compo
|
||||
a fully functioning GNU system made useful by the GNU corelibs, shell utilities and vital system components com
|
||||
a full OS as defined by POSIX.
|
||||
|
||||
Many computer users run a modified version of the GNU system every day, without realizing it. Through a peculia
|
||||
f events, the version of GNU which is widely used today is often called Linux, and many of its users are not aw
|
||||
it is basically the GNU system, developed by the GNU Project.
|
||||
|
||||
There really is a Linux, and these people are using it, but it is just a part of the system they use. Linux is
|
||||
el: the program in the system that allocates the machine's resources to the other programs that you run. The ke
|
||||
an essential part of an operating system, but useless by itself; it can only function in the context of a compl
|
||||
ating system. Linux is normally used in combination with the GNU operating system: the whole system is basicall
|
||||
th Linux added, or GNU/Linux. All the so-called Linux distributions are really distributions of GNU/Linux!
|
||||
";
|
||||
type CodeValue = u32;
|
||||
println!("compressing...");
|
||||
let model: ModelA<CodeValue> = ModelA::default();
|
||||
model.print_metrics();
|
||||
println!("");
|
||||
|
||||
let _enc = encode(data, &model);
|
||||
let _dec = decode(&_enc, &model);
|
||||
|
||||
println!("{}", String::from_utf8(_dec).unwrap());
|
||||
let enc = model.compress(data);
|
||||
//println!("{}", enc.len());
|
||||
println!("ModelA compressed to {} bytes", enc.len());
|
||||
println!(
|
||||
"Compression Ratio: {}",
|
||||
data.len() as f64 / _enc.len() as f64
|
||||
data.len() as f64 / enc.len() as f64
|
||||
);
|
||||
//println!("--------- Compressed data ---------\n{}", dump_hex(&enc));
|
||||
println!("");
|
||||
|
||||
println!("decompressing...");
|
||||
let model: ModelA<CodeValue> = ModelA::default();
|
||||
let dec = model.decompress(&enc).unwrap();
|
||||
println!("{}", String::from_utf8_lossy(&dec));
|
||||
}
|
||||
|
||||
80
src/model.rs
80
src/model.rs
@ -1,5 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
pub type Model = HashMap<u8, (f64, f64)>;
|
||||
use crate::bit_buffer::{BitReader, BitWriter};
|
||||
|
||||
pub fn get_symbol(model: &Model, low: f64, high: f64) -> Option<u8> {
|
||||
for (symbol, (start, end)) in model {
|
||||
@ -51,3 +52,82 @@ pub const ENGLISH: &[(u8, f64)] = &[
|
||||
(b' ', 0.01),
|
||||
(b'-', 0.02),
|
||||
];
|
||||
|
||||
fn encode(input: &[u8], model: &Model) -> Vec<u8> {
|
||||
const HALF: u64 = 1 << (u64::BITS - 1);
|
||||
const LOW_CONVERGE: u64 = 0b10 << (u64::BITS - 2);
|
||||
const HIGH_CONVERGE: u64 = 0b01 << (u64::BITS - 2);
|
||||
|
||||
let mut output = BitWriter::new();
|
||||
|
||||
let mut high = u64::MAX;
|
||||
let mut low = u64::MIN;
|
||||
let mut pending_bits = 0;
|
||||
|
||||
for symbol in input {
|
||||
let range = high - low;
|
||||
let p = model.get(symbol).expect("Invalid/Unsupported data");
|
||||
high = low + (range as f64 * p.1) as u64;
|
||||
low = low + (range as f64 * p.0) as u64;
|
||||
loop {
|
||||
if high < HALF {
|
||||
output.write(false);
|
||||
print!("0");
|
||||
while pending_bits > 0 {
|
||||
output.write(true);
|
||||
print!("1");
|
||||
pending_bits -= 1;
|
||||
}
|
||||
} else if low >= HALF {
|
||||
output.write(true);
|
||||
print!("1");
|
||||
while pending_bits > 0 {
|
||||
output.write(true);
|
||||
print!("0");
|
||||
pending_bits -= 1;
|
||||
}
|
||||
} else if low >= LOW_CONVERGE && high < HIGH_CONVERGE {
|
||||
println!("BET");
|
||||
pending_bits += 1;
|
||||
low <<= 1;
|
||||
low &= HALF - 1;
|
||||
high <<= 1;
|
||||
high &= HALF + 1;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
low <<= 1;
|
||||
high <<= 1;
|
||||
high |= 1;
|
||||
}
|
||||
}
|
||||
println!("");
|
||||
return output.flush();
|
||||
}
|
||||
|
||||
fn decode(input: &[u8], model: &Model) -> Vec<u8> {
|
||||
let mut high = 1.0;
|
||||
let mut low = 0.0;
|
||||
let mut output = vec![];
|
||||
for bit in BitReader::new(input) {
|
||||
let diff = high - low;
|
||||
if bit {
|
||||
//print!("1");
|
||||
low = low + (diff / 2.0);
|
||||
} else {
|
||||
high = high - (diff / 2.0);
|
||||
//print!("0");
|
||||
}
|
||||
if let Some(symbol) = get_symbol(model, low, high) {
|
||||
//println!("\nGot sym: {} from [{}, {})", symbol as char, low, high);
|
||||
output.push(symbol);
|
||||
let (slow, shigh) = model.get(&symbol).unwrap();
|
||||
let symdiff = *shigh - *slow;
|
||||
high = (high - *slow) / symdiff;
|
||||
low = (low - *slow) / symdiff;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user