Add trait Model
This commit is contained in:
parent
3fab061972
commit
7cbaef1194
@ -1,3 +1,4 @@
|
|||||||
|
use sac::model::Model;
|
||||||
use sac::modelA::ModelA;
|
use sac::modelA::ModelA;
|
||||||
|
|
||||||
const DATA: &[u8] = b"
|
const DATA: &[u8] = b"
|
||||||
|
|||||||
@ -95,8 +95,8 @@ impl<T> BitReader<T> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::model::Metrics;
|
||||||
use crate::modelA::tests::COMPRESSED_BYTES;
|
use crate::modelA::tests::COMPRESSED_BYTES;
|
||||||
use crate::modelA::Metrics;
|
|
||||||
|
|
||||||
struct InputBits<'a> {
|
struct InputBits<'a> {
|
||||||
input: &'a [u8],
|
input: &'a [u8],
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
|
// https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
|
||||||
|
pub mod model;
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub mod modelA;
|
pub mod modelA;
|
||||||
|
|
||||||
mod bit_buffer;
|
pub mod bit_buffer;
|
||||||
|
|||||||
@ -5,6 +5,7 @@ use std::{
|
|||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use sac::model::Model;
|
||||||
use sac::modelA::ModelA;
|
use sac::modelA::ModelA;
|
||||||
|
|
||||||
enum Mode {
|
enum Mode {
|
||||||
|
|||||||
298
src/model.rs
298
src/model.rs
@ -1,133 +1,207 @@
|
|||||||
use std::collections::HashMap;
|
use num::{FromPrimitive, Integer};
|
||||||
pub type Model = HashMap<u8, (f64, f64)>;
|
use std::{
|
||||||
|
io::{self, Read, Write},
|
||||||
|
ops::{BitAnd, Shl},
|
||||||
|
usize,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::bit_buffer::{BitReader, BitWriter};
|
use crate::bit_buffer::{BitReader, BitWriter};
|
||||||
|
|
||||||
pub fn get_symbol(model: &Model, low: f64, high: f64) -> Option<u8> {
|
trait Precision {
|
||||||
for (symbol, (start, end)) in model {
|
const PRECISION: usize;
|
||||||
if low >= *start && high < *end {
|
}
|
||||||
return Some(*symbol);
|
macro_rules! unsignedImplDigits {
|
||||||
|
($($type: ident),*) => { $(
|
||||||
|
impl Precision for $type {
|
||||||
|
const PRECISION: usize = (std::mem::size_of::<$type>() * 8);
|
||||||
}
|
}
|
||||||
|
)* };
|
||||||
|
}
|
||||||
|
macro_rules! signedImplDigits {
|
||||||
|
($($type: ident),*) => { $(
|
||||||
|
impl Precision for $type {
|
||||||
|
const PRECISION: usize = (std::mem::size_of::<$type>() * 8) - 1;
|
||||||
|
}
|
||||||
|
)* };
|
||||||
|
}
|
||||||
|
unsignedImplDigits!(u32, u64);
|
||||||
|
signedImplDigits!(i32, i64, i128);
|
||||||
|
|
||||||
|
pub trait Metrics:
|
||||||
|
Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>
|
||||||
|
{
|
||||||
|
const PRECISION: usize;
|
||||||
|
|
||||||
|
const FREQUENCY_BITS: usize = (Self::PRECISION / 2) - 1;
|
||||||
|
const CODE_VALUE_BITS: usize = Self::FREQUENCY_BITS + 2;
|
||||||
|
const MAX_CODE: usize = if Self::CODE_VALUE_BITS == 64 {
|
||||||
|
u64::MAX as usize
|
||||||
|
} else {
|
||||||
|
(1 << Self::CODE_VALUE_BITS) - 1
|
||||||
|
};
|
||||||
|
const MAX_FREQ: usize = (1 << Self::FREQUENCY_BITS) - 1;
|
||||||
|
|
||||||
|
const ONE_FOURTH: usize = 1 << (Self::CODE_VALUE_BITS - 2);
|
||||||
|
const ONE_HALF: usize = 2 * Self::ONE_FOURTH;
|
||||||
|
const THREE_FOURTHS: usize = 3 * Self::ONE_FOURTH;
|
||||||
|
|
||||||
|
fn print_metrics() {
|
||||||
|
println!("--------- Metrics ---------");
|
||||||
|
println!(" PRECISION: {}", Self::PRECISION);
|
||||||
|
println!(" FREQUENCY_BITS: {}", Self::FREQUENCY_BITS);
|
||||||
|
println!("CODE_VALUE_BITS: {}", Self::CODE_VALUE_BITS);
|
||||||
|
println!(" MAX_CODE: {}", Self::MAX_CODE);
|
||||||
|
println!(" MAX_FREQ: {}", Self::MAX_FREQ);
|
||||||
|
println!(" ONE_FOURTH: {}", Self::ONE_FOURTH);
|
||||||
|
println!(" ONE_HALF: {}", Self::ONE_HALF);
|
||||||
|
println!(" THREE_FOURTHS: {}", Self::THREE_FOURTHS);
|
||||||
}
|
}
|
||||||
return None;
|
}
|
||||||
|
impl<
|
||||||
|
T: Precision + Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>,
|
||||||
|
> Metrics for T
|
||||||
|
{
|
||||||
|
const PRECISION: usize = T::PRECISION;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn make_model(probabilities: &[(u8, f64)]) -> Model {
|
#[derive(Debug)]
|
||||||
let mut model = HashMap::new();
|
pub struct Prob<T> {
|
||||||
let mut end: f64 = 0.0;
|
pub low: T,
|
||||||
for (symbol, probability) in probabilities {
|
pub high: T,
|
||||||
let start: f64 = end;
|
pub max_code: T,
|
||||||
end = start + probability;
|
|
||||||
model.insert(*symbol, (start, end));
|
|
||||||
println!("{}: [{}, {})", *symbol as char, start, end);
|
|
||||||
}
|
|
||||||
return model;
|
|
||||||
}
|
}
|
||||||
pub const ENGLISH: &[(u8, f64)] = &[
|
|
||||||
(b'a', 0.08),
|
|
||||||
(b'b', 0.01),
|
|
||||||
(b'c', 0.02),
|
|
||||||
(b'd', 0.04),
|
|
||||||
(b'e', 0.12),
|
|
||||||
(b'f', 0.02),
|
|
||||||
(b'g', 0.02),
|
|
||||||
(b'h', 0.06),
|
|
||||||
(b'i', 0.07),
|
|
||||||
(b'j', 0.01),
|
|
||||||
(b'k', 0.01),
|
|
||||||
(b'l', 0.04),
|
|
||||||
(b'm', 0.02),
|
|
||||||
(b'n', 0.06),
|
|
||||||
(b'o', 0.07),
|
|
||||||
(b'p', 0.01),
|
|
||||||
(b'q', 0.01),
|
|
||||||
(b'r', 0.06),
|
|
||||||
(b's', 0.06),
|
|
||||||
(b't', 0.09),
|
|
||||||
(b'u', 0.02),
|
|
||||||
(b'v', 0.01),
|
|
||||||
(b'w', 0.02),
|
|
||||||
(b'x', 0.01),
|
|
||||||
(b'y', 0.02),
|
|
||||||
(b'z', 0.01),
|
|
||||||
(b' ', 0.01),
|
|
||||||
(b'-', 0.02),
|
|
||||||
];
|
|
||||||
|
|
||||||
fn encode(input: &[u8], model: &Model) -> Vec<u8> {
|
pub trait Model<CodeWord: Metrics> {
|
||||||
const HALF: u64 = 1 << (u64::BITS - 1);
|
fn get_probability(&mut self, c: i32) -> Prob<CodeWord>;
|
||||||
const LOW_CONVERGE: u64 = 0b10 << (u64::BITS - 2);
|
fn get_char(&mut self, scaled_value: CodeWord) -> Option<(i32, Prob<CodeWord>)>;
|
||||||
const HIGH_CONVERGE: u64 = 0b01 << (u64::BITS - 2);
|
fn get_max_code(&self) -> CodeWord;
|
||||||
|
|
||||||
let mut output = BitWriter::new();
|
#[allow(non_snake_case)]
|
||||||
|
fn decompress<T: Read, O: Write, I: Into<BitReader<T>>>(
|
||||||
|
mut self,
|
||||||
|
input: I,
|
||||||
|
output: &mut O,
|
||||||
|
) -> io::Result<()>
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
let ONE: CodeWord = CodeWord::one();
|
||||||
|
let ZERO: CodeWord = CodeWord::zero();
|
||||||
|
let ONE_HALF: CodeWord = CodeWord::from_usize(CodeWord::ONE_HALF).unwrap();
|
||||||
|
let ONE_FORTH: CodeWord = CodeWord::from_usize(CodeWord::ONE_FOURTH).unwrap();
|
||||||
|
let THREE_FOURTHS: CodeWord = CodeWord::from_usize(CodeWord::THREE_FOURTHS).unwrap();
|
||||||
|
|
||||||
let mut high = u64::MAX;
|
let mut input: BitReader<T> = input
|
||||||
let mut low = u64::MIN;
|
.into()
|
||||||
let mut pending_bits = 0;
|
.with_repeat_bits(CodeWord::CODE_VALUE_BITS as u16);
|
||||||
|
|
||||||
for symbol in input {
|
let mut low: CodeWord = ZERO;
|
||||||
let range = high - low;
|
let mut high: CodeWord = CodeWord::from_usize(CodeWord::MAX_CODE).unwrap();
|
||||||
let p = model.get(symbol).expect("Invalid/Unsupported data");
|
let mut value: CodeWord = ZERO;
|
||||||
high = low + (range as f64 * p.1) as u64;
|
|
||||||
low = low + (range as f64 * p.0) as u64;
|
for _ in 0..CodeWord::CODE_VALUE_BITS {
|
||||||
|
value = (value << CodeWord::one()) + if input.get_bit()? { ONE } else { ZERO };
|
||||||
|
}
|
||||||
loop {
|
loop {
|
||||||
if high < HALF {
|
let range: CodeWord = high - low + ONE;
|
||||||
output.write(false);
|
let scaled_value = ((value - low + ONE) * self.get_max_code() - ONE) / range;
|
||||||
print!("0");
|
let (c, p) = self.get_char(scaled_value).unwrap();
|
||||||
while pending_bits > 0 {
|
if c > 255 || c < 0 {
|
||||||
output.write(true);
|
|
||||||
print!("1");
|
|
||||||
pending_bits -= 1;
|
|
||||||
}
|
|
||||||
} else if low >= HALF {
|
|
||||||
output.write(true);
|
|
||||||
print!("1");
|
|
||||||
while pending_bits > 0 {
|
|
||||||
output.write(true);
|
|
||||||
print!("0");
|
|
||||||
pending_bits -= 1;
|
|
||||||
}
|
|
||||||
} else if low >= LOW_CONVERGE && high < HIGH_CONVERGE {
|
|
||||||
println!("BET");
|
|
||||||
pending_bits += 1;
|
|
||||||
low <<= 1;
|
|
||||||
low &= HALF - 1;
|
|
||||||
high <<= 1;
|
|
||||||
high &= HALF + 1;
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
low <<= 1;
|
output.write(&[c as u8])?;
|
||||||
high <<= 1;
|
high = low + (range * p.high) / p.max_code - ONE;
|
||||||
high |= 1;
|
low = low + (range * p.low) / p.max_code;
|
||||||
|
loop {
|
||||||
|
if high < ONE_HALF {
|
||||||
|
} else if low >= ONE_HALF {
|
||||||
|
value = value - ONE_HALF;
|
||||||
|
low = low - ONE_HALF;
|
||||||
|
high = high - ONE_HALF
|
||||||
|
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
||||||
|
value = value - ONE_FORTH;
|
||||||
|
low = low - ONE_FORTH;
|
||||||
|
high = high - ONE_FORTH;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
low = low << ONE;
|
||||||
|
high = (high << ONE) + ONE;
|
||||||
|
value = (value << ONE) + if input.get_bit()? { ONE } else { ZERO };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
println!("");
|
|
||||||
return output.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode(input: &[u8], model: &Model) -> Vec<u8> {
|
#[allow(non_snake_case)]
|
||||||
let mut high = 1.0;
|
fn compress<IN: Read, OUT: Write>(mut self, input: IN, output: &mut OUT) -> std::io::Result<()>
|
||||||
let mut low = 0.0;
|
where
|
||||||
let mut output = vec![];
|
Self: Sized,
|
||||||
for bit in BitReader::new(input) {
|
{
|
||||||
let diff = high - low;
|
let ONE: CodeWord = CodeWord::one();
|
||||||
if bit {
|
let ZERO: CodeWord = CodeWord::zero();
|
||||||
//print!("1");
|
let MAX_CODE: CodeWord = CodeWord::from_usize(CodeWord::MAX_CODE).unwrap();
|
||||||
low = low + (diff / 2.0);
|
let ONE_HALF: CodeWord = CodeWord::from_usize(CodeWord::ONE_HALF).unwrap();
|
||||||
|
let ONE_FORTH: CodeWord = CodeWord::from_usize(CodeWord::ONE_FOURTH).unwrap();
|
||||||
|
let THREE_FOURTHS: CodeWord = CodeWord::from_usize(CodeWord::THREE_FOURTHS).unwrap();
|
||||||
|
|
||||||
|
let mut output: BitWriter<OUT> = output.into();
|
||||||
|
|
||||||
|
let mut pending_bits: i32 = 0;
|
||||||
|
let mut low: CodeWord = ZERO;
|
||||||
|
let mut high: CodeWord = MAX_CODE;
|
||||||
|
|
||||||
|
let mut iter = input
|
||||||
|
.bytes()
|
||||||
|
.map(|r| r.map(|b| b as i32))
|
||||||
|
.chain([Ok(256_i32)]);
|
||||||
|
while let Some(Ok(mut c)) = iter.next() {
|
||||||
|
if c > 255 || c < 0 {
|
||||||
|
c = 256;
|
||||||
|
}
|
||||||
|
let p = self.get_probability(c);
|
||||||
|
let range: CodeWord = high - low + ONE;
|
||||||
|
high = low + (range * p.high / p.max_code) - ONE;
|
||||||
|
low = low + (range * p.low / p.max_code);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if high < ONE_HALF {
|
||||||
|
write_with_pending(false, &mut pending_bits, &mut output)?;
|
||||||
|
} else if low >= ONE_HALF {
|
||||||
|
write_with_pending(true, &mut pending_bits, &mut output)?;
|
||||||
|
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
||||||
|
pending_bits += 1;
|
||||||
|
low = low - ONE_FORTH;
|
||||||
|
high = high - ONE_FORTH;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
high = ((high << ONE) + ONE) & MAX_CODE;
|
||||||
|
low = (low << ONE) & MAX_CODE;
|
||||||
|
}
|
||||||
|
if c == 256 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pending_bits += 1;
|
||||||
|
if low < ONE_FORTH {
|
||||||
|
write_with_pending(false, &mut pending_bits, &mut output)?;
|
||||||
} else {
|
} else {
|
||||||
high = high - (diff / 2.0);
|
write_with_pending(true, &mut pending_bits, &mut output)?;
|
||||||
//print!("0");
|
|
||||||
}
|
}
|
||||||
if let Some(symbol) = get_symbol(model, low, high) {
|
|
||||||
//println!("\nGot sym: {} from [{}, {})", symbol as char, low, high);
|
|
||||||
output.push(symbol);
|
|
||||||
let (slow, shigh) = model.get(&symbol).unwrap();
|
|
||||||
let symdiff = *shigh - *slow;
|
|
||||||
high = (high - *slow) / symdiff;
|
|
||||||
low = (low - *slow) / symdiff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
return output.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn write_with_pending<W: std::io::Write>(
|
||||||
|
bit: bool,
|
||||||
|
pending: &mut i32,
|
||||||
|
output: &mut BitWriter<W>,
|
||||||
|
) -> std::io::Result<()> {
|
||||||
|
output.write(bit)?;
|
||||||
|
for _ in 0..*pending {
|
||||||
|
output.write(!bit)?;
|
||||||
|
}
|
||||||
|
*pending = 0;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
230
src/modelA.rs
230
src/modelA.rs
@ -1,81 +1,7 @@
|
|||||||
use std::{
|
use crate::model::{Metrics, Model, Prob};
|
||||||
fmt::Display,
|
|
||||||
io::{self, Read, Write},
|
|
||||||
ops::{BitAnd, Shl},
|
|
||||||
usize,
|
|
||||||
};
|
|
||||||
|
|
||||||
use num::{FromPrimitive, Integer};
|
pub struct ModelA<T> {
|
||||||
|
cumulative_frequency: [T; 258],
|
||||||
use crate::bit_buffer::{BitReader, BitWriter};
|
|
||||||
|
|
||||||
trait Digits {
|
|
||||||
const PRECISION: usize;
|
|
||||||
}
|
|
||||||
macro_rules! unsignedImplDigits {
|
|
||||||
($($type: ident),*) => { $(
|
|
||||||
impl Digits for $type {
|
|
||||||
const PRECISION: usize = (std::mem::size_of::<$type>() * 8);
|
|
||||||
}
|
|
||||||
)* };
|
|
||||||
}
|
|
||||||
macro_rules! signedImplDigits {
|
|
||||||
($($type: ident),*) => { $(
|
|
||||||
impl Digits for $type {
|
|
||||||
const PRECISION: usize = (std::mem::size_of::<$type>() * 8) - 1;
|
|
||||||
}
|
|
||||||
)* };
|
|
||||||
}
|
|
||||||
unsignedImplDigits!(u32, u64);
|
|
||||||
signedImplDigits!(i32, i64, i128);
|
|
||||||
|
|
||||||
pub trait Metrics:
|
|
||||||
Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>
|
|
||||||
{
|
|
||||||
const PRECISION: usize;
|
|
||||||
|
|
||||||
const FREQUENCY_BITS: usize = (Self::PRECISION / 2) - 1;
|
|
||||||
const CODE_VALUE_BITS: usize = Self::FREQUENCY_BITS + 2;
|
|
||||||
const MAX_CODE: usize = if Self::CODE_VALUE_BITS == 64 {
|
|
||||||
u64::MAX as usize
|
|
||||||
} else {
|
|
||||||
(1 << Self::CODE_VALUE_BITS) - 1
|
|
||||||
};
|
|
||||||
const MAX_FREQ: usize = (1 << Self::FREQUENCY_BITS) - 1;
|
|
||||||
|
|
||||||
const ONE_FOURTH: usize = 1 << (Self::CODE_VALUE_BITS - 2);
|
|
||||||
const ONE_HALF: usize = 2 * Self::ONE_FOURTH;
|
|
||||||
const THREE_FOURTHS: usize = 3 * Self::ONE_FOURTH;
|
|
||||||
|
|
||||||
fn print_metrics() {
|
|
||||||
println!("--------- Metrics ---------");
|
|
||||||
println!(" PRECISION: {}", Self::PRECISION);
|
|
||||||
println!(" FREQUENCY_BITS: {}", Self::FREQUENCY_BITS);
|
|
||||||
println!("CODE_VALUE_BITS: {}", Self::CODE_VALUE_BITS);
|
|
||||||
println!(" MAX_CODE: {}", Self::MAX_CODE);
|
|
||||||
println!(" MAX_FREQ: {}", Self::MAX_FREQ);
|
|
||||||
println!(" ONE_FOURTH: {}", Self::ONE_FOURTH);
|
|
||||||
println!(" ONE_HALF: {}", Self::ONE_HALF);
|
|
||||||
println!(" THREE_FOURTHS: {}", Self::THREE_FOURTHS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<T: Digits + Integer + FromPrimitive + Copy + BitAnd<Output = Self> + Shl<Output = Self>>
|
|
||||||
Metrics for T
|
|
||||||
{
|
|
||||||
const PRECISION: usize = T::PRECISION;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct Prob<T> {
|
|
||||||
low: T,
|
|
||||||
high: T,
|
|
||||||
total: T,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub struct ModelA<CODE_VALUE> {
|
|
||||||
cumulative_frequency: [CODE_VALUE; 258],
|
|
||||||
m_frozen: bool,
|
m_frozen: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,38 +19,39 @@ impl<T: Metrics> Default for ModelA<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
impl<T: Metrics> ModelA<T> {
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
impl<CODE_VALUE: Metrics + Display> ModelA<CODE_VALUE> {
|
|
||||||
pub fn print_metrics(&self) {
|
pub fn print_metrics(&self) {
|
||||||
CODE_VALUE::print_metrics();
|
T::print_metrics();
|
||||||
}
|
}
|
||||||
fn update(&mut self, c: i32) {
|
fn update(&mut self, c: i32) {
|
||||||
for i in (c as usize + 1)..258 {
|
for i in (c as usize + 1)..258 {
|
||||||
self.cumulative_frequency[i] = self.cumulative_frequency[i] + CODE_VALUE::one();
|
self.cumulative_frequency[i] = self.cumulative_frequency[i] + T::one();
|
||||||
}
|
}
|
||||||
if self.cumulative_frequency[257] >= CODE_VALUE::from_usize(CODE_VALUE::MAX_FREQ).unwrap() {
|
if self.cumulative_frequency[257] >= T::from_usize(T::MAX_FREQ).unwrap() {
|
||||||
self.m_frozen = true;
|
self.m_frozen = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn getProbability(&mut self, c: i32) -> Prob<CODE_VALUE> {
|
}
|
||||||
|
impl<T: Metrics> Model<T> for ModelA<T> {
|
||||||
|
fn get_probability(&mut self, c: i32) -> crate::model::Prob<T> {
|
||||||
let p = Prob {
|
let p = Prob {
|
||||||
low: self.cumulative_frequency[c as usize],
|
low: self.cumulative_frequency[c as usize],
|
||||||
high: self.cumulative_frequency[c as usize + 1],
|
high: self.cumulative_frequency[c as usize + 1],
|
||||||
total: self.cumulative_frequency[257],
|
max_code: self.cumulative_frequency[257],
|
||||||
};
|
};
|
||||||
if !self.m_frozen {
|
if !self.m_frozen {
|
||||||
self.update(c);
|
self.update(c);
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
fn getChar(&mut self, scaled_value: CODE_VALUE) -> Option<(i32, Prob<CODE_VALUE>)> {
|
|
||||||
|
fn get_char(&mut self, scaled_value: T) -> Option<(i32, crate::model::Prob<T>)> {
|
||||||
for i in 0..258 {
|
for i in 0..258 {
|
||||||
if scaled_value < self.cumulative_frequency[i + 1] {
|
if scaled_value < self.cumulative_frequency[i + 1] {
|
||||||
let p = Prob {
|
let p = Prob {
|
||||||
low: self.cumulative_frequency[i],
|
low: self.cumulative_frequency[i],
|
||||||
high: self.cumulative_frequency[i + 1],
|
high: self.cumulative_frequency[i + 1],
|
||||||
total: self.cumulative_frequency[257],
|
max_code: self.cumulative_frequency[257],
|
||||||
};
|
};
|
||||||
if !self.m_frozen {
|
if !self.m_frozen {
|
||||||
self.update(i as i32)
|
self.update(i as i32)
|
||||||
@ -134,135 +61,10 @@ impl<CODE_VALUE: Metrics + Display> ModelA<CODE_VALUE> {
|
|||||||
}
|
}
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
fn getCount(&self) -> CODE_VALUE {
|
|
||||||
|
fn get_max_code(&self) -> T {
|
||||||
self.cumulative_frequency[257]
|
self.cumulative_frequency[257]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decompress<T: io::Read, O: io::Write, I: Into<BitReader<T>>>(
|
|
||||||
mut self,
|
|
||||||
input: I,
|
|
||||||
output: &mut O,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
let ONE: CODE_VALUE = CODE_VALUE::one();
|
|
||||||
let ZERO: CODE_VALUE = CODE_VALUE::zero();
|
|
||||||
let ONE_HALF: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::ONE_HALF).unwrap();
|
|
||||||
let ONE_FORTH: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::ONE_FOURTH).unwrap();
|
|
||||||
let THREE_FOURTHS: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::THREE_FOURTHS).unwrap();
|
|
||||||
|
|
||||||
let mut input: BitReader<T> = input
|
|
||||||
.into()
|
|
||||||
.with_repeat_bits(CODE_VALUE::CODE_VALUE_BITS as u16);
|
|
||||||
|
|
||||||
let mut low: CODE_VALUE = ZERO;
|
|
||||||
let mut high: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::MAX_CODE).unwrap();
|
|
||||||
let mut value: CODE_VALUE = ZERO;
|
|
||||||
|
|
||||||
for _ in 0..CODE_VALUE::CODE_VALUE_BITS {
|
|
||||||
value = (value << CODE_VALUE::one()) + if input.get_bit()? { ONE } else { ZERO };
|
|
||||||
}
|
|
||||||
loop {
|
|
||||||
let range: CODE_VALUE = high - low + ONE;
|
|
||||||
let scaled_value = ((value - low + ONE) * self.getCount() - ONE) / range;
|
|
||||||
let (c, p) = self.getChar(scaled_value).unwrap();
|
|
||||||
if c > 255 || c < 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
output.write(&[c as u8])?;
|
|
||||||
high = low + (range * p.high) / p.total - ONE;
|
|
||||||
low = low + (range * p.low) / p.total;
|
|
||||||
loop {
|
|
||||||
if high < ONE_HALF {
|
|
||||||
} else if low >= ONE_HALF {
|
|
||||||
value = value - ONE_HALF;
|
|
||||||
low = low - ONE_HALF;
|
|
||||||
high = high - ONE_HALF
|
|
||||||
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
|
||||||
value = value - ONE_FORTH;
|
|
||||||
low = low - ONE_FORTH;
|
|
||||||
high = high - ONE_FORTH;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
low = low << ONE;
|
|
||||||
high = (high << ONE) + ONE;
|
|
||||||
value = (value << ONE) + if input.get_bit()? { ONE } else { ZERO };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compress<IN: Read, OUT: Write>(
|
|
||||||
mut self,
|
|
||||||
input: IN,
|
|
||||||
output: &mut OUT,
|
|
||||||
) -> std::io::Result<()> {
|
|
||||||
let ONE: CODE_VALUE = CODE_VALUE::one();
|
|
||||||
let ZERO: CODE_VALUE = CODE_VALUE::zero();
|
|
||||||
let MAX_CODE: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::MAX_CODE).unwrap();
|
|
||||||
let ONE_HALF: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::ONE_HALF).unwrap();
|
|
||||||
let ONE_FORTH: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::ONE_FOURTH).unwrap();
|
|
||||||
let THREE_FOURTHS: CODE_VALUE = CODE_VALUE::from_usize(CODE_VALUE::THREE_FOURTHS).unwrap();
|
|
||||||
|
|
||||||
let mut output: BitWriter<OUT> = output.into();
|
|
||||||
|
|
||||||
let mut pending_bits: i32 = 0;
|
|
||||||
let mut low: CODE_VALUE = ZERO;
|
|
||||||
let mut high: CODE_VALUE = MAX_CODE;
|
|
||||||
|
|
||||||
let mut iter = input
|
|
||||||
.bytes()
|
|
||||||
.map(|r| r.map(|b| b as i32))
|
|
||||||
.chain([Ok(256_i32)]);
|
|
||||||
while let Some(Ok(mut c)) = iter.next() {
|
|
||||||
if c > 255 || c < 0 {
|
|
||||||
c = 256;
|
|
||||||
}
|
|
||||||
let p = self.getProbability(c);
|
|
||||||
let range: CODE_VALUE = high - low + ONE;
|
|
||||||
high = low + (range * p.high / p.total) - ONE;
|
|
||||||
low = low + (range * p.low / p.total);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
if high < ONE_HALF {
|
|
||||||
Self::write_with_pending(false, &mut pending_bits, &mut output)?;
|
|
||||||
} else if low >= ONE_HALF {
|
|
||||||
Self::write_with_pending(true, &mut pending_bits, &mut output)?;
|
|
||||||
} else if low >= ONE_FORTH && high < THREE_FOURTHS {
|
|
||||||
pending_bits += 1;
|
|
||||||
low = low - ONE_FORTH;
|
|
||||||
high = high - ONE_FORTH;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
high = ((high << ONE) + ONE) & MAX_CODE;
|
|
||||||
low = (low << ONE) & MAX_CODE;
|
|
||||||
}
|
|
||||||
if c == 256 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pending_bits += 1;
|
|
||||||
if low < ONE_FORTH {
|
|
||||||
Self::write_with_pending(false, &mut pending_bits, &mut output)?;
|
|
||||||
} else {
|
|
||||||
Self::write_with_pending(true, &mut pending_bits, &mut output)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_with_pending<W: std::io::Write>(
|
|
||||||
bit: bool,
|
|
||||||
pending: &mut i32,
|
|
||||||
output: &mut BitWriter<W>,
|
|
||||||
) -> std::io::Result<()> {
|
|
||||||
output.write(bit)?;
|
|
||||||
for _ in 0..*pending {
|
|
||||||
output.write(!bit)?;
|
|
||||||
}
|
|
||||||
*pending = 0;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user