Huffman encoding in Rust
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
3
down vote
favorite
An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.
mod huff
use std::boxed::Box;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub struct SymbolFreq
symbol: char,
frequency: u32,
fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));
result
pub struct Node
left : Option<Box<Node>>,
right : Option<Box<Node>>,
symbol_freq : SymbolFreq,
impl Node
fn is_leaf(&self) -> bool
self.left.is_none() && self.right.is_none()
impl PartialEq for Node
fn eq(&self, other: &Self) -> bool
self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)
impl Eq for Node
impl PartialOrd for Node
fn partial_cmp(&self, other: &Self) -> Option<Ordering>
other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)
impl Ord for Node
fn cmp(&self, other: &Self) -> Ordering
other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)
fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
Box::new(Node left: None, right: None, symbol_freq: symbol_freq )
fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>
let symbol_freq = SymbolFreq
symbol: 0 as char,
frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
;
Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )
fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>
let mut freqs = BinaryHeap::with_capacity(input.len());
for e in input
freqs.push(make_heap_node(e));
freqs
fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>
if input.is_empty()
return None;
let root;
loop
assert!(!input.is_empty());
let l = input.pop().unwrap();
let r = match input.pop()
Some(x) => x,
None => root = l; break; ,
;
let n = make_combined_heap_node(l, r);
match input.is_empty()
true => root = n; break; ,
false => input.push(n),
assert!(input.is_empty());
Some(root)
fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)
let n = match input.as_ref()
Some(x) => x,
None => return,
;
if n.is_leaf()
if encoding.is_empty() // root is leaf! -> add extra bit
encoding.push(false);
assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());
else
let mut l = encoding.clone();
l.push(false);
make_encoding_table(&n.left, l, output);
let mut r = encoding;
r.push(true);
make_encoding_table(&n.right, r, output);
fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)
let freqs = make_frequency_table(input);
debug::print_frequencies(&freqs);
let queue = make_queue(freqs);
debug::print_queue(&queue);
let root = make_tree(queue);
debug::print_tree(&root);
let mut encodings = BTreeMap::new();
make_encoding_table(&root, Vec::new(), &mut encodings);
(root, encodings)
fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>
let mut result = Vec::new();
for c in input.chars()
result.extend(encodings.get(&c).unwrap());
result
fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>
let mut result = String::new();
let mut i = 0usize;
while i != input.len()
let mut next = root;
loop
let node = match next.as_ref()
Some(x) => x,
None => return Err("do_decoding() failed: reached null node in tree."),
;
if node.is_leaf()
result.push(node.symbol_freq.symbol);
break;
if i == input.len()
return Err("do_decoding() failed: reached end of input partway through decoding char.");
next = match input[i]
false => &node.left,
true => &node.right,
;
i += 1;
if next == root // root is leaf! -> always consume at least one bit!
i += 1;
Ok(result)
pub type Tree = Option<Box<Node>>;
pub type EncodedData = Vec<bool>;
pub fn encode(input: &String) -> (Tree, EncodedData)
let (tree, encodings) = make_encodings(input);
debug::print_encodings(&encodings);
let encoded_data = do_encoding(input, &encodings);
debug::print_byte_comparison(input, &encoded_data);
(tree, encoded_data)
pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
do_decoding(input, tree)
mod debug
use super::*;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
println!("frequencies:");
for e in frequencies.iter()
println!("t(, )", e.symbol, e.frequency);
pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
println!("queue:");
for n in queue
println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
fn print_node(node: &Option<Box<Node>>, indent: u32)
let n = match node.as_ref()
Some(x) => x,
None => return,
;
for _ in 0..indent
print!("t");
println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
print_node(&n.left, indent + 1);
print_node(&n.right, indent + 1);
pub fn print_tree(root: &Option<Box<Node>>)
println!("tree:");
print_node(&root, 0);
fn encoding_to_string(encoding: &Vec<bool>) -> String
let mut s = String::with_capacity(encoding.len());
for b in encoding
write!(&mut s, "", *b as u8).unwrap();
s
pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
println!("encodings:");
for (k, v) in encodings
print!(": n", k, encoding_to_string(v));
pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
print!("data: n", encoding_to_string(&encoded));
print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);
// debug
#[cfg(test)]
mod tests
use super::*;
#[test]
fn encode_decode_empty_input()
let input = "".to_string();
let (t, d) = encode(&input);
assert!(t.is_none());
assert!(d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_single_char_input()
let input = "aaaaaaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_two_char_input()
let input = "abaabbaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_three_char_input()
let input = " abaa bba a".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_utf8_input()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn decode_empty_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&Vec::new(), &t).unwrap();
assert!(o.is_empty());
#[test]
#[should_panic(expected = "reached end of input")]
fn decode_corrupt_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, mut d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
d.pop();
d.pop();
let _ = decode(&d, &t).unwrap();
#[test]
#[should_panic(expected = "reached null node")]
fn decode_corrupt_tree()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (mut t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
t.as_mut().unwrap().left = None;
let _ = decode(&d, &t).unwrap();
// tests
// huff
#[cfg(not(test))]
fn main()
let input = "this is an example for huffman encoding";
print!("input: n", input);
let (tree, data) = huff::encode(&input.to_string());
let output = huff::decode(&data, &tree).unwrap();
print!("output: n", output);
Things I wonder about:
- It seems like a lot of trait implementations are required to put something in a
BinaryHeap
... is there a simpler way? - The compiler claims
std::fmt::Write
is unused, but if I comment it out, it doesn't compile. - The version of Rust I have installed is kinda old (
rustc 1.3.0 (9a92aaf19 2015-09-15)
) as I've been without internet. Are there things that could be improved with a more modern version?
Any other feedback is welcome.
reinventing-the-wheel rust compression
add a comment |Â
up vote
3
down vote
favorite
An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.
mod huff
use std::boxed::Box;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub struct SymbolFreq
symbol: char,
frequency: u32,
fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));
result
pub struct Node
left : Option<Box<Node>>,
right : Option<Box<Node>>,
symbol_freq : SymbolFreq,
impl Node
fn is_leaf(&self) -> bool
self.left.is_none() && self.right.is_none()
impl PartialEq for Node
fn eq(&self, other: &Self) -> bool
self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)
impl Eq for Node
impl PartialOrd for Node
fn partial_cmp(&self, other: &Self) -> Option<Ordering>
other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)
impl Ord for Node
fn cmp(&self, other: &Self) -> Ordering
other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)
fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
Box::new(Node left: None, right: None, symbol_freq: symbol_freq )
fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>
let symbol_freq = SymbolFreq
symbol: 0 as char,
frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
;
Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )
fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>
let mut freqs = BinaryHeap::with_capacity(input.len());
for e in input
freqs.push(make_heap_node(e));
freqs
fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>
if input.is_empty()
return None;
let root;
loop
assert!(!input.is_empty());
let l = input.pop().unwrap();
let r = match input.pop()
Some(x) => x,
None => root = l; break; ,
;
let n = make_combined_heap_node(l, r);
match input.is_empty()
true => root = n; break; ,
false => input.push(n),
assert!(input.is_empty());
Some(root)
fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)
let n = match input.as_ref()
Some(x) => x,
None => return,
;
if n.is_leaf()
if encoding.is_empty() // root is leaf! -> add extra bit
encoding.push(false);
assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());
else
let mut l = encoding.clone();
l.push(false);
make_encoding_table(&n.left, l, output);
let mut r = encoding;
r.push(true);
make_encoding_table(&n.right, r, output);
fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)
let freqs = make_frequency_table(input);
debug::print_frequencies(&freqs);
let queue = make_queue(freqs);
debug::print_queue(&queue);
let root = make_tree(queue);
debug::print_tree(&root);
let mut encodings = BTreeMap::new();
make_encoding_table(&root, Vec::new(), &mut encodings);
(root, encodings)
fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>
let mut result = Vec::new();
for c in input.chars()
result.extend(encodings.get(&c).unwrap());
result
fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>
let mut result = String::new();
let mut i = 0usize;
while i != input.len()
let mut next = root;
loop
let node = match next.as_ref()
Some(x) => x,
None => return Err("do_decoding() failed: reached null node in tree."),
;
if node.is_leaf()
result.push(node.symbol_freq.symbol);
break;
if i == input.len()
return Err("do_decoding() failed: reached end of input partway through decoding char.");
next = match input[i]
false => &node.left,
true => &node.right,
;
i += 1;
if next == root // root is leaf! -> always consume at least one bit!
i += 1;
Ok(result)
pub type Tree = Option<Box<Node>>;
pub type EncodedData = Vec<bool>;
pub fn encode(input: &String) -> (Tree, EncodedData)
let (tree, encodings) = make_encodings(input);
debug::print_encodings(&encodings);
let encoded_data = do_encoding(input, &encodings);
debug::print_byte_comparison(input, &encoded_data);
(tree, encoded_data)
pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
do_decoding(input, tree)
mod debug
use super::*;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
println!("frequencies:");
for e in frequencies.iter()
println!("t(, )", e.symbol, e.frequency);
pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
println!("queue:");
for n in queue
println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
fn print_node(node: &Option<Box<Node>>, indent: u32)
let n = match node.as_ref()
Some(x) => x,
None => return,
;
for _ in 0..indent
print!("t");
println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
print_node(&n.left, indent + 1);
print_node(&n.right, indent + 1);
pub fn print_tree(root: &Option<Box<Node>>)
println!("tree:");
print_node(&root, 0);
fn encoding_to_string(encoding: &Vec<bool>) -> String
let mut s = String::with_capacity(encoding.len());
for b in encoding
write!(&mut s, "", *b as u8).unwrap();
s
pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
println!("encodings:");
for (k, v) in encodings
print!(": n", k, encoding_to_string(v));
pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
print!("data: n", encoding_to_string(&encoded));
print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);
// debug
#[cfg(test)]
mod tests
use super::*;
#[test]
fn encode_decode_empty_input()
let input = "".to_string();
let (t, d) = encode(&input);
assert!(t.is_none());
assert!(d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_single_char_input()
let input = "aaaaaaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_two_char_input()
let input = "abaabbaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_three_char_input()
let input = " abaa bba a".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_utf8_input()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn decode_empty_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&Vec::new(), &t).unwrap();
assert!(o.is_empty());
#[test]
#[should_panic(expected = "reached end of input")]
fn decode_corrupt_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, mut d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
d.pop();
d.pop();
let _ = decode(&d, &t).unwrap();
#[test]
#[should_panic(expected = "reached null node")]
fn decode_corrupt_tree()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (mut t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
t.as_mut().unwrap().left = None;
let _ = decode(&d, &t).unwrap();
// tests
// huff
#[cfg(not(test))]
fn main()
let input = "this is an example for huffman encoding";
print!("input: n", input);
let (tree, data) = huff::encode(&input.to_string());
let output = huff::decode(&data, &tree).unwrap();
print!("output: n", output);
Things I wonder about:
- It seems like a lot of trait implementations are required to put something in a
BinaryHeap
... is there a simpler way? - The compiler claims
std::fmt::Write
is unused, but if I comment it out, it doesn't compile. - The version of Rust I have installed is kinda old (
rustc 1.3.0 (9a92aaf19 2015-09-15)
) as I've been without internet. Are there things that could be improved with a more modern version?
Any other feedback is welcome.
reinventing-the-wheel rust compression
add a comment |Â
up vote
3
down vote
favorite
up vote
3
down vote
favorite
An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.
mod huff
use std::boxed::Box;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub struct SymbolFreq
symbol: char,
frequency: u32,
fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));
result
pub struct Node
left : Option<Box<Node>>,
right : Option<Box<Node>>,
symbol_freq : SymbolFreq,
impl Node
fn is_leaf(&self) -> bool
self.left.is_none() && self.right.is_none()
impl PartialEq for Node
fn eq(&self, other: &Self) -> bool
self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)
impl Eq for Node
impl PartialOrd for Node
fn partial_cmp(&self, other: &Self) -> Option<Ordering>
other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)
impl Ord for Node
fn cmp(&self, other: &Self) -> Ordering
other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)
fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
Box::new(Node left: None, right: None, symbol_freq: symbol_freq )
fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>
let symbol_freq = SymbolFreq
symbol: 0 as char,
frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
;
Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )
fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>
let mut freqs = BinaryHeap::with_capacity(input.len());
for e in input
freqs.push(make_heap_node(e));
freqs
fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>
if input.is_empty()
return None;
let root;
loop
assert!(!input.is_empty());
let l = input.pop().unwrap();
let r = match input.pop()
Some(x) => x,
None => root = l; break; ,
;
let n = make_combined_heap_node(l, r);
match input.is_empty()
true => root = n; break; ,
false => input.push(n),
assert!(input.is_empty());
Some(root)
fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)
let n = match input.as_ref()
Some(x) => x,
None => return,
;
if n.is_leaf()
if encoding.is_empty() // root is leaf! -> add extra bit
encoding.push(false);
assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());
else
let mut l = encoding.clone();
l.push(false);
make_encoding_table(&n.left, l, output);
let mut r = encoding;
r.push(true);
make_encoding_table(&n.right, r, output);
fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)
let freqs = make_frequency_table(input);
debug::print_frequencies(&freqs);
let queue = make_queue(freqs);
debug::print_queue(&queue);
let root = make_tree(queue);
debug::print_tree(&root);
let mut encodings = BTreeMap::new();
make_encoding_table(&root, Vec::new(), &mut encodings);
(root, encodings)
fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>
let mut result = Vec::new();
for c in input.chars()
result.extend(encodings.get(&c).unwrap());
result
fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>
let mut result = String::new();
let mut i = 0usize;
while i != input.len()
let mut next = root;
loop
let node = match next.as_ref()
Some(x) => x,
None => return Err("do_decoding() failed: reached null node in tree."),
;
if node.is_leaf()
result.push(node.symbol_freq.symbol);
break;
if i == input.len()
return Err("do_decoding() failed: reached end of input partway through decoding char.");
next = match input[i]
false => &node.left,
true => &node.right,
;
i += 1;
if next == root // root is leaf! -> always consume at least one bit!
i += 1;
Ok(result)
pub type Tree = Option<Box<Node>>;
pub type EncodedData = Vec<bool>;
pub fn encode(input: &String) -> (Tree, EncodedData)
let (tree, encodings) = make_encodings(input);
debug::print_encodings(&encodings);
let encoded_data = do_encoding(input, &encodings);
debug::print_byte_comparison(input, &encoded_data);
(tree, encoded_data)
pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
do_decoding(input, tree)
mod debug
use super::*;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
println!("frequencies:");
for e in frequencies.iter()
println!("t(, )", e.symbol, e.frequency);
pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
println!("queue:");
for n in queue
println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
fn print_node(node: &Option<Box<Node>>, indent: u32)
let n = match node.as_ref()
Some(x) => x,
None => return,
;
for _ in 0..indent
print!("t");
println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
print_node(&n.left, indent + 1);
print_node(&n.right, indent + 1);
pub fn print_tree(root: &Option<Box<Node>>)
println!("tree:");
print_node(&root, 0);
fn encoding_to_string(encoding: &Vec<bool>) -> String
let mut s = String::with_capacity(encoding.len());
for b in encoding
write!(&mut s, "", *b as u8).unwrap();
s
pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
println!("encodings:");
for (k, v) in encodings
print!(": n", k, encoding_to_string(v));
pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
print!("data: n", encoding_to_string(&encoded));
print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);
// debug
#[cfg(test)]
mod tests
use super::*;
#[test]
fn encode_decode_empty_input()
let input = "".to_string();
let (t, d) = encode(&input);
assert!(t.is_none());
assert!(d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_single_char_input()
let input = "aaaaaaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_two_char_input()
let input = "abaabbaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_three_char_input()
let input = " abaa bba a".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_utf8_input()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn decode_empty_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&Vec::new(), &t).unwrap();
assert!(o.is_empty());
#[test]
#[should_panic(expected = "reached end of input")]
fn decode_corrupt_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, mut d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
d.pop();
d.pop();
let _ = decode(&d, &t).unwrap();
#[test]
#[should_panic(expected = "reached null node")]
fn decode_corrupt_tree()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (mut t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
t.as_mut().unwrap().left = None;
let _ = decode(&d, &t).unwrap();
// tests
// huff
#[cfg(not(test))]
fn main()
let input = "this is an example for huffman encoding";
print!("input: n", input);
let (tree, data) = huff::encode(&input.to_string());
let output = huff::decode(&data, &tree).unwrap();
print!("output: n", output);
Things I wonder about:
- It seems like a lot of trait implementations are required to put something in a
BinaryHeap
... is there a simpler way? - The compiler claims
std::fmt::Write
is unused, but if I comment it out, it doesn't compile. - The version of Rust I have installed is kinda old (
rustc 1.3.0 (9a92aaf19 2015-09-15)
) as I've been without internet. Are there things that could be improved with a more modern version?
Any other feedback is welcome.
reinventing-the-wheel rust compression
An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.
mod huff
use std::boxed::Box;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub struct SymbolFreq
symbol: char,
frequency: u32,
fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));
result
pub struct Node
left : Option<Box<Node>>,
right : Option<Box<Node>>,
symbol_freq : SymbolFreq,
impl Node
fn is_leaf(&self) -> bool
self.left.is_none() && self.right.is_none()
impl PartialEq for Node
fn eq(&self, other: &Self) -> bool
self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)
impl Eq for Node
impl PartialOrd for Node
fn partial_cmp(&self, other: &Self) -> Option<Ordering>
other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)
impl Ord for Node
fn cmp(&self, other: &Self) -> Ordering
other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)
fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
Box::new(Node left: None, right: None, symbol_freq: symbol_freq )
fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>
let symbol_freq = SymbolFreq
symbol: 0 as char,
frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
;
Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )
fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>
let mut freqs = BinaryHeap::with_capacity(input.len());
for e in input
freqs.push(make_heap_node(e));
freqs
fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>
if input.is_empty()
return None;
let root;
loop
assert!(!input.is_empty());
let l = input.pop().unwrap();
let r = match input.pop()
Some(x) => x,
None => root = l; break; ,
;
let n = make_combined_heap_node(l, r);
match input.is_empty()
true => root = n; break; ,
false => input.push(n),
assert!(input.is_empty());
Some(root)
fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)
let n = match input.as_ref()
Some(x) => x,
None => return,
;
if n.is_leaf()
if encoding.is_empty() // root is leaf! -> add extra bit
encoding.push(false);
assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());
else
let mut l = encoding.clone();
l.push(false);
make_encoding_table(&n.left, l, output);
let mut r = encoding;
r.push(true);
make_encoding_table(&n.right, r, output);
fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)
let freqs = make_frequency_table(input);
debug::print_frequencies(&freqs);
let queue = make_queue(freqs);
debug::print_queue(&queue);
let root = make_tree(queue);
debug::print_tree(&root);
let mut encodings = BTreeMap::new();
make_encoding_table(&root, Vec::new(), &mut encodings);
(root, encodings)
fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>
let mut result = Vec::new();
for c in input.chars()
result.extend(encodings.get(&c).unwrap());
result
fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>
let mut result = String::new();
let mut i = 0usize;
while i != input.len()
let mut next = root;
loop
let node = match next.as_ref()
Some(x) => x,
None => return Err("do_decoding() failed: reached null node in tree."),
;
if node.is_leaf()
result.push(node.symbol_freq.symbol);
break;
if i == input.len()
return Err("do_decoding() failed: reached end of input partway through decoding char.");
next = match input[i]
false => &node.left,
true => &node.right,
;
i += 1;
if next == root // root is leaf! -> always consume at least one bit!
i += 1;
Ok(result)
pub type Tree = Option<Box<Node>>;
pub type EncodedData = Vec<bool>;
pub fn encode(input: &String) -> (Tree, EncodedData)
let (tree, encodings) = make_encodings(input);
debug::print_encodings(&encodings);
let encoded_data = do_encoding(input, &encodings);
debug::print_byte_comparison(input, &encoded_data);
(tree, encoded_data)
pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
do_decoding(input, tree)
mod debug
use super::*;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;
pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
println!("frequencies:");
for e in frequencies.iter()
println!("t(, )", e.symbol, e.frequency);
pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
println!("queue:");
for n in queue
println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
fn print_node(node: &Option<Box<Node>>, indent: u32)
let n = match node.as_ref()
Some(x) => x,
None => return,
;
for _ in 0..indent
print!("t");
println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);
print_node(&n.left, indent + 1);
print_node(&n.right, indent + 1);
pub fn print_tree(root: &Option<Box<Node>>)
println!("tree:");
print_node(&root, 0);
fn encoding_to_string(encoding: &Vec<bool>) -> String
let mut s = String::with_capacity(encoding.len());
for b in encoding
write!(&mut s, "", *b as u8).unwrap();
s
pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
println!("encodings:");
for (k, v) in encodings
print!(": n", k, encoding_to_string(v));
pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
print!("data: n", encoding_to_string(&encoded));
print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);
// debug
#[cfg(test)]
mod tests
use super::*;
#[test]
fn encode_decode_empty_input()
let input = "".to_string();
let (t, d) = encode(&input);
assert!(t.is_none());
assert!(d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_single_char_input()
let input = "aaaaaaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_two_char_input()
let input = "abaabbaa".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_three_char_input()
let input = " abaa bba a".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn encode_decode_utf8_input()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&d, &t).unwrap();
assert_eq!(o, input);
#[test]
fn decode_empty_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
let o = decode(&Vec::new(), &t).unwrap();
assert!(o.is_empty());
#[test]
#[should_panic(expected = "reached end of input")]
fn decode_corrupt_encoded_data()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (t, mut d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
d.pop();
d.pop();
let _ = decode(&d, &t).unwrap();
#[test]
#[should_panic(expected = "reached null node")]
fn decode_corrupt_tree()
let input = "ð ñò å¿ ç‹ÂÂãÂÂÃ¥Â
¬ hi asdkfweiuryhfiusdf".to_string();
let (mut t, d) = encode(&input);
assert!(!t.is_none());
assert!(!d.is_empty());
t.as_mut().unwrap().left = None;
let _ = decode(&d, &t).unwrap();
// tests
// huff
#[cfg(not(test))]
fn main()
let input = "this is an example for huffman encoding";
print!("input: n", input);
let (tree, data) = huff::encode(&input.to_string());
let output = huff::decode(&data, &tree).unwrap();
print!("output: n", output);
Things I wonder about:
- It seems like a lot of trait implementations are required to put something in a
BinaryHeap
... is there a simpler way? - The compiler claims
std::fmt::Write
is unused, but if I comment it out, it doesn't compile. - The version of Rust I have installed is kinda old (
rustc 1.3.0 (9a92aaf19 2015-09-15)
) as I've been without internet. Are there things that could be improved with a more modern version?
Any other feedback is welcome.
reinventing-the-wheel rust compression
edited Mar 23 at 22:47
Shepmaster
6,5981018
6,5981018
asked Mar 19 at 16:08
user673679
1,042518
1,042518
add a comment |Â
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f189943%2fhuffman-encoding-in-rust%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password