Compare commits
12 Commits
6da0e73c5e
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7cbaef1194 | |||
| 3fab061972 | |||
| 96415385ef | |||
| 34d0f0bafa | |||
| b68ef65f31 | |||
| 5eac451458 | |||
| 8ea134e008 | |||
| 8bfe71a1af | |||
| b4da1bdc13 | |||
| 5a3bfa9618 | |||
| 91f1860ce5 | |||
| 0f52321cf5 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
/target
|
/target
|
||||||
|
Cargo.lock
|
||||||
|
|||||||
@@ -4,3 +4,4 @@ version = "0.1.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
num = "0.4.3"
|
||||||
|
|||||||
46
examples/example.rs
Normal file
46
examples/example.rs
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
use sac::model::Model;
|
||||||
|
use sac::modelA::ModelA;
|
||||||
|
|
||||||
|
const DATA: &[u8] = b"
|
||||||
|
I'd just like to interject for a moment. What you're refering to as Linux, is in fact, GNU/Linux, or as I've re
|
||||||
|
aken to calling it, GNU plus Linux. Linux is not an operating system unto itself, but rather another free compo
|
||||||
|
a fully functioning GNU system made useful by the GNU corelibs, shell utilities and vital system components com
|
||||||
|
a full OS as defined by POSIX.
|
||||||
|
|
||||||
|
Many computer users run a modified version of the GNU system every day, without realizing it. Through a peculia
|
||||||
|
f events, the version of GNU which is widely used today is often called Linux, and many of its users are not aw
|
||||||
|
it is basically the GNU system, developed by the GNU Project.
|
||||||
|
|
||||||
|
There really is a Linux, and these people are using it, but it is just a part of the system they use. Linux is
|
||||||
|
el: the program in the system that allocates the machine's resources to the other programs that you run. The ke
|
||||||
|
an essential part of an operating system, but useless by itself; it can only function in the context of a compl
|
||||||
|
ating system. Linux is normally used in combination with the GNU operating system: the whole system is basicall
|
||||||
|
th Linux added, or GNU/Linux. All the so-called Linux distributions are really distributions of GNU/Linux!
|
||||||
|
";
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
type CodeValue = u32;
|
||||||
|
println!(
|
||||||
|
"Using model: ModelA<{}>",
|
||||||
|
std::any::type_name::<CodeValue>()
|
||||||
|
);
|
||||||
|
let model: ModelA<CodeValue> = ModelA::default();
|
||||||
|
model.print_metrics();
|
||||||
|
println!("");
|
||||||
|
|
||||||
|
let mut compressed = Vec::new();
|
||||||
|
println!("compressing...");
|
||||||
|
model.compress(&DATA[..], &mut compressed).unwrap();
|
||||||
|
println!("ModelA compressed to {} bytes", compressed.len());
|
||||||
|
println!(
|
||||||
|
"Compression Ratio: {}",
|
||||||
|
DATA.len() as f64 / compressed.len() as f64
|
||||||
|
);
|
||||||
|
println!("");
|
||||||
|
|
||||||
|
println!("decompressing...");
|
||||||
|
let mut decompressed = Vec::new();
|
||||||
|
let model: ModelA<CodeValue> = ModelA::default();
|
||||||
|
model.decompress(&compressed, &mut decompressed).unwrap();
|
||||||
|
println!("{}", String::from_utf8_lossy(&decompressed));
|
||||||
|
}
|
||||||
181
src/bit_buffer.rs
Normal file
181
src/bit_buffer.rs
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
use std::io::{self, Bytes, Cursor, Read, Write};
|
||||||
|
|
||||||
|
pub struct BitWriter<'a, W: ?Sized + Write> {
|
||||||
|
bits: u8,
|
||||||
|
nextbit: usize,
|
||||||
|
output: &'a mut W,
|
||||||
|
}
|
||||||
|
impl<'a, W: Write> From<&'a mut W> for BitWriter<'a, W> {
|
||||||
|
fn from(value: &'a mut W) -> Self {
|
||||||
|
BitWriter::new(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a, W: Write> BitWriter<'a, W> {
|
||||||
|
pub fn new(writer: &'a mut W) -> Self {
|
||||||
|
return BitWriter {
|
||||||
|
bits: 0,
|
||||||
|
nextbit: 7,
|
||||||
|
output: writer,
|
||||||
|
};
|
||||||
|
//writer.into()
|
||||||
|
}
|
||||||
|
pub fn write(&mut self, bit: bool) -> io::Result<()> {
|
||||||
|
if bit {
|
||||||
|
self.bits = 1 << self.nextbit | self.bits;
|
||||||
|
}
|
||||||
|
if self.nextbit == 0 {
|
||||||
|
self.output.write(&[self.bits])?;
|
||||||
|
self.bits = 0;
|
||||||
|
self.nextbit = 7;
|
||||||
|
} else {
|
||||||
|
self.nextbit -= 1;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
pub fn flush(self) -> std::io::Result<()> {
|
||||||
|
if self.bits != 0 {
|
||||||
|
self.output.write(&[self.bits])?;
|
||||||
|
}
|
||||||
|
return self.output.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BitReader<T> {
|
||||||
|
next: u8,
|
||||||
|
bits: u8,
|
||||||
|
repeat_bits: i32,
|
||||||
|
input: Bytes<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: Read> From<R> for BitReader<R> {
|
||||||
|
fn from(value: R) -> Self {
|
||||||
|
BitReader::new(value.bytes())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T: AsRef<[u8]>> From<T> for BitReader<Cursor<T>> {
|
||||||
|
fn from(value: T) -> Self {
|
||||||
|
let c = Cursor::new(value);
|
||||||
|
c.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: Read> BitReader<R> {
|
||||||
|
pub fn new(value: Bytes<R>) -> Self {
|
||||||
|
BitReader {
|
||||||
|
next: 0,
|
||||||
|
bits: 0,
|
||||||
|
repeat_bits: 0,
|
||||||
|
input: value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn get_bit(&mut self) -> io::Result<bool> {
|
||||||
|
if self.next == 0 {
|
||||||
|
let next = self.input.next().transpose()?;
|
||||||
|
if let Some(byte) = next {
|
||||||
|
self.bits = byte;
|
||||||
|
} else if self.repeat_bits <= 0 {
|
||||||
|
return Err(io::Error::other("No more bits"));
|
||||||
|
} else {
|
||||||
|
self.repeat_bits -= 8;
|
||||||
|
}
|
||||||
|
self.next = 1 << 7;
|
||||||
|
}
|
||||||
|
let bit = (self.bits & self.next) > 0;
|
||||||
|
self.next = self.next >> 1;
|
||||||
|
return Ok(bit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T> BitReader<T> {
|
||||||
|
pub fn with_repeat_bits(mut self, n_bits: u16) -> Self {
|
||||||
|
self.repeat_bits = n_bits as i32;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::model::Metrics;
|
||||||
|
use crate::modelA::tests::COMPRESSED_BYTES;
|
||||||
|
|
||||||
|
struct InputBits<'a> {
|
||||||
|
input: &'a [u8],
|
||||||
|
current_byte: u32,
|
||||||
|
last_mask: u32,
|
||||||
|
code_value_bits: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Poppable {
|
||||||
|
fn pop(&mut self) -> Option<u8>;
|
||||||
|
}
|
||||||
|
impl Poppable for &[u8] {
|
||||||
|
fn pop(&mut self) -> Option<u8> {
|
||||||
|
if self.len() == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let out = self[0];
|
||||||
|
*self = &self[1..];
|
||||||
|
return Some(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> InputBits<'a> {
|
||||||
|
pub fn new<T: Metrics>(data: &'a [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
input: data,
|
||||||
|
current_byte: 0,
|
||||||
|
last_mask: 1,
|
||||||
|
code_value_bits: T::CODE_VALUE_BITS as i32,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn get_bit(&mut self) -> Option<bool> {
|
||||||
|
if self.last_mask == 1 {
|
||||||
|
match self.input.pop() {
|
||||||
|
None => {
|
||||||
|
if self.code_value_bits <= 0 {
|
||||||
|
return None;
|
||||||
|
//panic!("IDK Man");
|
||||||
|
} else {
|
||||||
|
self.code_value_bits -= 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(byte) => self.current_byte = byte as u32,
|
||||||
|
}
|
||||||
|
self.last_mask = 0x80;
|
||||||
|
} else {
|
||||||
|
self.last_mask >>= 1;
|
||||||
|
}
|
||||||
|
let bit = (self.current_byte & self.last_mask) != 0;
|
||||||
|
return Some(bit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn bit_reader_test_i32() {
|
||||||
|
bit_reader_test_type::<i32>();
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn bit_reader_test_u32() {
|
||||||
|
bit_reader_test_type::<u32>();
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn bit_reader_test_i64() {
|
||||||
|
bit_reader_test_type::<i64>();
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn bit_reader_test_u64() {
|
||||||
|
bit_reader_test_type::<u64>();
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn bit_reader_test_i128() {
|
||||||
|
bit_reader_test_type::<i128>();
|
||||||
|
}
|
||||||
|
fn bit_reader_test_type<T: Metrics>() {
|
||||||
|
let mut br = BitReader::from(COMPRESSED_BYTES).with_repeat_bits(T::CODE_VALUE_BITS as u16);
|
||||||
|
let mut ib = InputBits::new::<T>(&COMPRESSED_BYTES);
|
||||||
|
|
||||||
|
while let Some(a) = ib.get_bit() {
|
||||||
|
let b = br.get_bit().unwrap();
|
||||||
|
assert_eq!(a, b);
|
||||||
|
}
|
||||||
|
let _ = br.get_bit().expect_err("Extra bits");
|
||||||
|
}
|
||||||
|
}
|
||||||
6
src/lib.rs
Normal file
6
src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
|
||||||
|
pub mod model;
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub mod modelA;
|
||||||
|
|
||||||
|
pub mod bit_buffer;
|
||||||
133
src/main.rs
133
src/main.rs
@@ -1,99 +1,52 @@
|
|||||||
// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
|
use std::{
|
||||||
use std::collections::HashMap;
|
env,
|
||||||
|
fs::File,
|
||||||
|
io::{self, BufReader, Read},
|
||||||
|
path::Path,
|
||||||
|
};
|
||||||
|
|
||||||
type Model = HashMap<u8, (f64, f64)>;
|
use sac::model::Model;
|
||||||
|
use sac::modelA::ModelA;
|
||||||
|
|
||||||
fn get_symbol(model: &Model, d: f64) -> Option<u8> {
|
enum Mode {
|
||||||
// Brute force
|
Compress,
|
||||||
for (symbol, (start, end)) in model {
|
Decompress,
|
||||||
if d >= *start && d < *end {
|
}
|
||||||
return Some(*symbol);
|
type CodeValue = u32;
|
||||||
}
|
|
||||||
}
|
fn get_file<FilePath: AsRef<Path>>(filepath: FilePath) -> io::Result<Box<dyn Read>> {
|
||||||
return None;
|
let f = File::open(&filepath)?;
|
||||||
|
Ok(Box::new(BufReader::new(f)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(data: &[u8], model: &Model) -> f64 {
|
fn main() -> io::Result<()> {
|
||||||
let mut high: f64 = 1.0;
|
let args: Vec<String> = env::args().collect();
|
||||||
let mut low: f64 = 0.0;
|
let name = &args[0];
|
||||||
for symbol in data {
|
|
||||||
let p = model.get(symbol).expect("Invalid/Unsupported data");
|
|
||||||
let range = high - low;
|
|
||||||
high = low + range * p.1;
|
|
||||||
low = low + range * p.0;
|
|
||||||
}
|
|
||||||
return low + (high - low) / 2.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode(message: f64, model: &Model) {
|
let (input, mode): (Box<dyn Read>, Mode) = match args.len() {
|
||||||
let mut high: f64 = 1.0;
|
2 if args[1].to_lowercase() == "compress" => (Box::new(io::stdin()), Mode::Compress),
|
||||||
let mut low: f64 = 0.0;
|
2 if args[1].to_lowercase() == "decompress" => (Box::new(io::stdin()), Mode::Decompress),
|
||||||
loop {
|
|
||||||
let range = high - low;
|
3 if args[1].to_lowercase() == "compress" => (get_file(&args[2])?, Mode::Compress),
|
||||||
let d = (message - low) / range;
|
3 if args[1].to_lowercase() == "decompress" => (get_file(&args[2])?, Mode::Decompress),
|
||||||
let c = match get_symbol(&model, d) {
|
|
||||||
Some(c) => c,
|
_ => {
|
||||||
None => {
|
eprintln!("Usage:");
|
||||||
println!("");
|
eprintln!("{name} compress file > output # compress file and save to output");
|
||||||
eprintln!("Decode error: d={d}");
|
eprintln!("{name} decompress file > output # decompress file and save to output");
|
||||||
return;
|
return Err(io::Error::other(format!(
|
||||||
|
"Invalid command `{}`",
|
||||||
|
args.join(" ")
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if c == b'-' {
|
|
||||||
println!("");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
print!("{}", c as char);
|
|
||||||
let p = model.get(&c).expect("Decode error");
|
|
||||||
high = low + range * p.1;
|
|
||||||
low = low + range * p.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn make_model(probabilities: &[(u8, f64)]) -> Model {
|
let model: ModelA<CodeValue> = ModelA::default();
|
||||||
let mut model = HashMap::new();
|
let mut output = io::stdout().lock();
|
||||||
let mut end: f64 = 0.0;
|
match mode {
|
||||||
for (symbol, probability) in probabilities {
|
Mode::Compress => model.compress(input, &mut output)?,
|
||||||
let start: f64 = end;
|
Mode::Decompress => model.decompress(input, &mut output)?,
|
||||||
end = start + probability;
|
};
|
||||||
model.insert(*symbol, (start, end));
|
|
||||||
println!("{}: [{}, {})", *symbol as char, start, end);
|
Ok(())
|
||||||
}
|
|
||||||
return model;
|
|
||||||
}
|
|
||||||
const ENGLISH: &[(u8, f64)] = &[
|
|
||||||
(b'a', 0.08),
|
|
||||||
(b'b', 0.01),
|
|
||||||
(b'c', 0.02),
|
|
||||||
(b'd', 0.04),
|
|
||||||
(b'e', 0.12),
|
|
||||||
(b'f', 0.02),
|
|
||||||
(b'g', 0.02),
|
|
||||||
(b'h', 0.06),
|
|
||||||
(b'i', 0.07),
|
|
||||||
(b'j', 0.01),
|
|
||||||
(b'k', 0.01),
|
|
||||||
(b'l', 0.04),
|
|
||||||
(b'm', 0.02),
|
|
||||||
(b'n', 0.06),
|
|
||||||
(b'o', 0.07),
|
|
||||||
(b'p', 0.01),
|
|
||||||
(b'q', 0.01),
|
|
||||||
(b'r', 0.06),
|
|
||||||
(b's', 0.06),
|
|
||||||
(b't', 0.09),
|
|
||||||
(b'u', 0.02),
|
|
||||||
(b'v', 0.01),
|
|
||||||
(b'w', 0.02),
|
|
||||||
(b'x', 0.01),
|
|
||||||
(b'y', 0.02),
|
|
||||||
(b'z', 0.01),
|
|
||||||
(b' ', 0.01),
|
|
||||||
(b'-', 0.02),
|
|
||||||
];
|
|
||||||
fn main() {
|
|
||||||
let model: Model = make_model(ENGLISH);
|
|
||||||
let message = encode(b"hello world-", &model);
|
|
||||||
println!("{message}");
|
|
||||||
decode(message, &model);
|
|
||||||
}
|
}
|
||||||
|
|||||||
207
src/model.rs
Normal file
207
src/model.rs
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
use num::{FromPrimitive, Integer};
|
||||||
|
use std::{
|
||||||
|
io::{self, Read, Write},
|
||||||
|
ops::{BitAnd, Shl},
|
||||||
|
usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::bit_buffer::{BitReader, BitWriter};
|
||||||
|
|
||||||
|
trait Precision {
|
||||||
|
const PRECISION: usize;
|
||||||
|
}
|
||||||
|
macro_rules! unsignedImplDigits {
|
||||||
|
($($type: ident),*) => { $(
|
||||||
|
impl Precision for $type {
|
||||||
|
const PRECISION: usize = (std::mem::size_of::<$type>() * 8);
|
||||||
|
}
|
||||||
|
)* };
|
||||||
|
}
|
||||||
|
macro_rules! signedImplDigits {
|
||||||
|
($($type: ident),*) => { $(
|
||||||
|
impl Precision for $type {
|
||||||
|
const PRECISION: usize = (std::mem::size_of::<$type>() * 8) - 1;
|
||||||
|
}
|
||||||
|
)* };
|
||||||
|
}
|
||||||
|
unsignedImplDigits!(u32, u64);
|
||||||
|
signedImplDigits!(i32, i64, i128);
|
||||||
|
|
||||||
|
pub trait Metrics:
|
||||||
|
Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>
|
||||||
|
{
|
||||||
|
const PRECISION: usize;
|
||||||
|
|
||||||
|
const FREQUENCY_BITS: usize = (Self::PRECISION / 2) - 1;
|
||||||
|
const CODE_VALUE_BITS: usize = Self::FREQUENCY_BITS + 2;
|
||||||
|
const MAX_CODE: usize = if Self::CODE_VALUE_BITS == 64 {
|
||||||
|
u64::MAX as usize
|
||||||
|
} else {
|
||||||
|
(1 << Self::CODE_VALUE_BITS) - 1
|
||||||
|
};
|
||||||
|
const MAX_FREQ: usize = (1 << Self::FREQUENCY_BITS) - 1;
|
||||||
|
|
||||||
|
const ONE_FOURTH: usize = 1 << (Self::CODE_VALUE_BITS - 2);
|
||||||
|
const ONE_HALF: usize = 2 * Self::ONE_FOURTH;
|
||||||
|
const THREE_FOURTHS: usize = 3 * Self::ONE_FOURTH;
|
||||||
|
|
||||||
|
fn print_metrics() {
|
||||||
|
println!("--------- Metrics ---------");
|
||||||
|
println!(" PRECISION: {}", Self::PRECISION);
|
||||||
|
println!(" FREQUENCY_BITS: {}", Self::FREQUENCY_BITS);
|
||||||
|
println!("CODE_VALUE_BITS: {}", Self::CODE_VALUE_BITS);
|
||||||
|
println!(" MAX_CODE: {}", Self::MAX_CODE);
|
||||||
|
println!(" MAX_FREQ: {}", Self::MAX_FREQ);
|
||||||
|
println!(" ONE_FOURTH: {}", Self::ONE_FOURTH);
|
||||||
|
println!(" ONE_HALF: {}", Self::ONE_HALF);
|
||||||
|
println!(" THREE_FOURTHS: {}", Self::THREE_FOURTHS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<
|
||||||
|
T: Precision + Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>,
|
||||||
|
> Metrics for T
|
||||||
|
{
|
||||||
|
const PRECISION: usize = T::PRECISION;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Prob<T> {
|
||||||
|
pub low: T,
|
||||||
|
pub high: T,
|
||||||
|
pub max_code: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Model<CodeWord: Metrics> {
|
||||||
|
fn get_probability(&mut self, c: i32) -> Prob<CodeWord>;
|
||||||
|
fn get_char(&mut self, scaled_value: CodeWord) -> Option<(i32, Prob<CodeWord>)>;
|
||||||
|
fn get_max_code(&self) -> CodeWord;
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
fn decompress<T: Read, O: Write, I: Into<BitReader<T>>>(
|
||||||
|
mut self,
|
||||||
|
input: I,
|
||||||
|
output: &mut O,
|
||||||
|
) -> io::Result<()>
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
let ONE: CodeWord = CodeWord::one();
|
||||||
|
let ZERO: CodeWord = CodeWord::zero();
|
||||||
|
let ONE_HALF: CodeWord = CodeWord::from_usize(CodeWord::ONE_HALF).unwrap();
|
||||||
|
let ONE_FORTH: CodeWord = CodeWord::from_usize(CodeWord::ONE_FOURTH).unwrap();
|
||||||
|
let THREE_FOURTHS: CodeWord = CodeWord::from_usize(CodeWord::THREE_FOURTHS).unwrap();
|
||||||
|
|
||||||
|
let mut input: BitReader<T> = input
|
||||||
|
.into()
|
||||||
|
.with_repeat_bits(CodeWord::CODE_VALUE_BITS as u16);
|
||||||
|
|
||||||
|
let mut low: CodeWord = ZERO;
|
||||||
|
let mut high: CodeWord = CodeWord::from_usize(CodeWord::MAX_CODE).unwrap();
|
||||||
|
let mut value: CodeWord = ZERO;
|
||||||
|
|
||||||
|
for _ in 0..CodeWord::CODE_VALUE_BITS {
|
||||||
|
value = (value << CodeWord::one()) + if input.get_bit()? { ONE } else { ZERO };
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
let range: CodeWord = high - low + ONE;
|
||||||
|
let scaled_value = ((value - low + ONE) * self.get_max_code() - ONE) / range;
|
||||||
|
let (c, p) = self.get_char(scaled_value).unwrap();
|
||||||
|
if c > 255 || c < 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
output.write(&[c as u8])?;
|
||||||
|
high = low + (range * p.high) / p.max_code - ONE;
|
||||||
|
low = low + (range * p.low) / p.max_code;
|
||||||
|
loop {
|
||||||
|
if high < ONE_HALF {
|
||||||
|
} else if low >= ONE_HALF {
|
||||||
|
value = value - ONE_HALF;
|
||||||
|
low = low - ONE_HALF;
|
||||||
|
high = high - ONE_HALF
|
||||||
|
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
||||||
|
value = value - ONE_FORTH;
|
||||||
|
low = low - ONE_FORTH;
|
||||||
|
high = high - ONE_FORTH;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
low = low << ONE;
|
||||||
|
high = (high << ONE) + ONE;
|
||||||
|
value = (value << ONE) + if input.get_bit()? { ONE } else { ZERO };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
fn compress<IN: Read, OUT: Write>(mut self, input: IN, output: &mut OUT) -> std::io::Result<()>
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
let ONE: CodeWord = CodeWord::one();
|
||||||
|
let ZERO: CodeWord = CodeWord::zero();
|
||||||
|
let MAX_CODE: CodeWord = CodeWord::from_usize(CodeWord::MAX_CODE).unwrap();
|
||||||
|
let ONE_HALF: CodeWord = CodeWord::from_usize(CodeWord::ONE_HALF).unwrap();
|
||||||
|
let ONE_FORTH: CodeWord = CodeWord::from_usize(CodeWord::ONE_FOURTH).unwrap();
|
||||||
|
let THREE_FOURTHS: CodeWord = CodeWord::from_usize(CodeWord::THREE_FOURTHS).unwrap();
|
||||||
|
|
||||||
|
let mut output: BitWriter<OUT> = output.into();
|
||||||
|
|
||||||
|
let mut pending_bits: i32 = 0;
|
||||||
|
let mut low: CodeWord = ZERO;
|
||||||
|
let mut high: CodeWord = MAX_CODE;
|
||||||
|
|
||||||
|
let mut iter = input
|
||||||
|
.bytes()
|
||||||
|
.map(|r| r.map(|b| b as i32))
|
||||||
|
.chain([Ok(256_i32)]);
|
||||||
|
while let Some(Ok(mut c)) = iter.next() {
|
||||||
|
if c > 255 || c < 0 {
|
||||||
|
c = 256;
|
||||||
|
}
|
||||||
|
let p = self.get_probability(c);
|
||||||
|
let range: CodeWord = high - low + ONE;
|
||||||
|
high = low + (range * p.high / p.max_code) - ONE;
|
||||||
|
low = low + (range * p.low / p.max_code);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if high < ONE_HALF {
|
||||||
|
write_with_pending(false, &mut pending_bits, &mut output)?;
|
||||||
|
} else if low >= ONE_HALF {
|
||||||
|
write_with_pending(true, &mut pending_bits, &mut output)?;
|
||||||
|
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
||||||
|
pending_bits += 1;
|
||||||
|
low = low - ONE_FORTH;
|
||||||
|
high = high - ONE_FORTH;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
high = ((high << ONE) + ONE) & MAX_CODE;
|
||||||
|
low = (low << ONE) & MAX_CODE;
|
||||||
|
}
|
||||||
|
if c == 256 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pending_bits += 1;
|
||||||
|
if low < ONE_FORTH {
|
||||||
|
write_with_pending(false, &mut pending_bits, &mut output)?;
|
||||||
|
} else {
|
||||||
|
write_with_pending(true, &mut pending_bits, &mut output)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn write_with_pending<W: std::io::Write>(
|
||||||
|
bit: bool,
|
||||||
|
pending: &mut i32,
|
||||||
|
output: &mut BitWriter<W>,
|
||||||
|
) -> std::io::Result<()> {
|
||||||
|
output.write(bit)?;
|
||||||
|
for _ in 0..*pending {
|
||||||
|
output.write(!bit)?;
|
||||||
|
}
|
||||||
|
*pending = 0;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
99
src/modelA.rs
Normal file
99
src/modelA.rs
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
use crate::model::{Metrics, Model, Prob};
|
||||||
|
|
||||||
|
pub struct ModelA<T> {
|
||||||
|
cumulative_frequency: [T; 258],
|
||||||
|
m_frozen: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Metrics> Default for ModelA<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
let m_frozen = false;
|
||||||
|
let mut cumulative_frequency = [T::zero(); 258];
|
||||||
|
for i in 0..258 {
|
||||||
|
cumulative_frequency[i] = T::from_usize(i).unwrap();
|
||||||
|
}
|
||||||
|
Self {
|
||||||
|
cumulative_frequency,
|
||||||
|
m_frozen,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Metrics> ModelA<T> {
|
||||||
|
pub fn print_metrics(&self) {
|
||||||
|
T::print_metrics();
|
||||||
|
}
|
||||||
|
fn update(&mut self, c: i32) {
|
||||||
|
for i in (c as usize + 1)..258 {
|
||||||
|
self.cumulative_frequency[i] = self.cumulative_frequency[i] + T::one();
|
||||||
|
}
|
||||||
|
if self.cumulative_frequency[257] >= T::from_usize(T::MAX_FREQ).unwrap() {
|
||||||
|
self.m_frozen = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T: Metrics> Model<T> for ModelA<T> {
|
||||||
|
fn get_probability(&mut self, c: i32) -> crate::model::Prob<T> {
|
||||||
|
let p = Prob {
|
||||||
|
low: self.cumulative_frequency[c as usize],
|
||||||
|
high: self.cumulative_frequency[c as usize + 1],
|
||||||
|
max_code: self.cumulative_frequency[257],
|
||||||
|
};
|
||||||
|
if !self.m_frozen {
|
||||||
|
self.update(c);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_char(&mut self, scaled_value: T) -> Option<(i32, crate::model::Prob<T>)> {
|
||||||
|
for i in 0..258 {
|
||||||
|
if scaled_value < self.cumulative_frequency[i + 1] {
|
||||||
|
let p = Prob {
|
||||||
|
low: self.cumulative_frequency[i],
|
||||||
|
high: self.cumulative_frequency[i + 1],
|
||||||
|
max_code: self.cumulative_frequency[257],
|
||||||
|
};
|
||||||
|
if !self.m_frozen {
|
||||||
|
self.update(i as i32)
|
||||||
|
}
|
||||||
|
return Some((i as i32, p));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_max_code(&self) -> T {
|
||||||
|
self.cumulative_frequency[257]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub mod tests {
|
||||||
|
use super::*;
|
||||||
|
pub const UNCOMPRESSED_BYTES: &[u8; 13] = b"hello world-\n";
|
||||||
|
/// Compressed bytes taken from output of the c++ version
|
||||||
|
pub const COMPRESSED_BYTES: [u8; 14] = [
|
||||||
|
0x67, 0xfc, 0x3e, 0x4a, 0x9d, 0x03, 0x7f, 0x35, 0xf1, 0x08, 0xd8, 0xa6, 0xbc, 0xda,
|
||||||
|
];
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compression_test() {
|
||||||
|
let model: ModelA<i32> = ModelA::default();
|
||||||
|
let mut enc = Vec::new();
|
||||||
|
model.compress(&UNCOMPRESSED_BYTES[..], &mut enc).unwrap();
|
||||||
|
assert_eq!(COMPRESSED_BYTES.len(), enc.len());
|
||||||
|
for (a, b) in enc.iter().zip(COMPRESSED_BYTES.iter()) {
|
||||||
|
assert_eq!(a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn decompression_test() {
|
||||||
|
let model: ModelA<i32> = ModelA::default();
|
||||||
|
let mut dec = Vec::new();
|
||||||
|
model.decompress(&COMPRESSED_BYTES, &mut dec).unwrap();
|
||||||
|
assert_eq!(UNCOMPRESSED_BYTES.len(), dec.len());
|
||||||
|
for (a, b) in dec.iter().zip(UNCOMPRESSED_BYTES.iter()) {
|
||||||
|
assert_eq!(a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user