Huffman encoding in Rust

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
3
down vote

favorite












An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.



mod huff 

use std::boxed::Box;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;

pub struct SymbolFreq
symbol: char,
frequency: u32,


fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));

result


pub struct Node
left : Option<Box<Node>>,
right : Option<Box<Node>>,
symbol_freq : SymbolFreq,


impl Node
fn is_leaf(&self) -> bool
self.left.is_none() && self.right.is_none()



impl PartialEq for Node
fn eq(&self, other: &Self) -> bool
self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)



impl Eq for Node

impl PartialOrd for Node
fn partial_cmp(&self, other: &Self) -> Option<Ordering>
other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)



impl Ord for Node
fn cmp(&self, other: &Self) -> Ordering
other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)



fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
Box::new(Node left: None, right: None, symbol_freq: symbol_freq )


fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>

let symbol_freq = SymbolFreq
symbol: 0 as char,
frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
;

Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )


fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>

let mut freqs = BinaryHeap::with_capacity(input.len());

for e in input
freqs.push(make_heap_node(e));


freqs


fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>

if input.is_empty()
return None;


let root;

loop

assert!(!input.is_empty());

let l = input.pop().unwrap();

let r = match input.pop()
Some(x) => x,
None => root = l; break; ,
;

let n = make_combined_heap_node(l, r);

match input.is_empty()
true => root = n; break; ,
false => input.push(n),



assert!(input.is_empty());

Some(root)


fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)

let n = match input.as_ref()
Some(x) => x,
None => return,
;

if n.is_leaf()

if encoding.is_empty() // root is leaf! -> add extra bit
encoding.push(false);


assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());

else

let mut l = encoding.clone();
l.push(false);
make_encoding_table(&n.left, l, output);

let mut r = encoding;
r.push(true);
make_encoding_table(&n.right, r, output);



fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)

let freqs = make_frequency_table(input);
debug::print_frequencies(&freqs);

let queue = make_queue(freqs);
debug::print_queue(&queue);

let root = make_tree(queue);
debug::print_tree(&root);

let mut encodings = BTreeMap::new();
make_encoding_table(&root, Vec::new(), &mut encodings);

(root, encodings)


fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>

let mut result = Vec::new();

for c in input.chars()
result.extend(encodings.get(&c).unwrap());


result


fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>

let mut result = String::new();
let mut i = 0usize;

while i != input.len()

let mut next = root;

loop

let node = match next.as_ref()
Some(x) => x,
None => return Err("do_decoding() failed: reached null node in tree."),
;

if node.is_leaf()
result.push(node.symbol_freq.symbol);
break;


if i == input.len()
return Err("do_decoding() failed: reached end of input partway through decoding char.");


next = match input[i]
false => &node.left,
true => &node.right,
;

i += 1;


if next == root // root is leaf! -> always consume at least one bit!
i += 1;



Ok(result)



pub type Tree = Option<Box<Node>>;
pub type EncodedData = Vec<bool>;

pub fn encode(input: &String) -> (Tree, EncodedData)

let (tree, encodings) = make_encodings(input);
debug::print_encodings(&encodings);

let encoded_data = do_encoding(input, &encodings);
debug::print_byte_comparison(input, &encoded_data);

(tree, encoded_data)


pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
do_decoding(input, tree)



mod debug

use super::*;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::fmt::Write;

pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
println!("frequencies:");
for e in frequencies.iter()
println!("t(, )", e.symbol, e.frequency);



pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
println!("queue:");
for n in queue
println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);



fn print_node(node: &Option<Box<Node>>, indent: u32)
let n = match node.as_ref()
Some(x) => x,
None => return,
;

for _ in 0..indent
print!("t");

println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);

print_node(&n.left, indent + 1);
print_node(&n.right, indent + 1);


pub fn print_tree(root: &Option<Box<Node>>)
println!("tree:");
print_node(&root, 0);


fn encoding_to_string(encoding: &Vec<bool>) -> String
let mut s = String::with_capacity(encoding.len());
for b in encoding
write!(&mut s, "", *b as u8).unwrap();

s


pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
println!("encodings:");
for (k, v) in encodings
print!(": n", k, encoding_to_string(v));



pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
print!("data: n", encoding_to_string(&encoded));
print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);


// debug

#[cfg(test)]
mod tests

use super::*;

#[test]
fn encode_decode_empty_input()
let input = "".to_string();
let (t, d) = encode(&input);

assert!(t.is_none());
assert!(d.is_empty());

let o = decode(&d, &t).unwrap();

assert_eq!(o, input);


#[test]
fn encode_decode_single_char_input()
let input = "aaaaaaa".to_string();
let (t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

let o = decode(&d, &t).unwrap();

assert_eq!(o, input);


#[test]
fn encode_decode_two_char_input()
let input = "abaabbaa".to_string();
let (t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

let o = decode(&d, &t).unwrap();

assert_eq!(o, input);


#[test]
fn encode_decode_three_char_input()
let input = " abaa bba a".to_string();
let (t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

let o = decode(&d, &t).unwrap();

assert_eq!(o, input);


#[test]
fn encode_decode_utf8_input()
let input = "𝄞 αβ 忠犬ハチ公 hi".to_string();
let (t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

let o = decode(&d, &t).unwrap();

assert_eq!(o, input);


#[test]
fn decode_empty_encoded_data()

let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
let (t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

let o = decode(&Vec::new(), &t).unwrap();

assert!(o.is_empty());


#[test]
#[should_panic(expected = "reached end of input")]
fn decode_corrupt_encoded_data()

let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
let (t, mut d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

d.pop();
d.pop();

let _ = decode(&d, &t).unwrap();


#[test]
#[should_panic(expected = "reached null node")]
fn decode_corrupt_tree()

let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
let (mut t, d) = encode(&input);

assert!(!t.is_none());
assert!(!d.is_empty());

t.as_mut().unwrap().left = None;

let _ = decode(&d, &t).unwrap();


// tests

// huff


#[cfg(not(test))]
fn main()

let input = "this is an example for huffman encoding";
print!("input: n", input);

let (tree, data) = huff::encode(&input.to_string());

let output = huff::decode(&data, &tree).unwrap();
print!("output: n", output);



Things I wonder about:



  • It seems like a lot of trait implementations are required to put something in a BinaryHeap... is there a simpler way?

  • The compiler claims std::fmt::Write is unused, but if I comment it out, it doesn't compile.

  • The version of Rust I have installed is kinda old (rustc 1.3.0 (9a92aaf19 2015-09-15)) as I've been without internet. Are there things that could be improved with a more modern version?

Any other feedback is welcome.







share|improve this question



























    up vote
    3
    down vote

    favorite












    An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.



    mod huff 

    use std::boxed::Box;
    use std::cmp::Ordering;
    use std::collections::BTreeMap;
    use std::collections::BinaryHeap;
    use std::fmt::Write;

    pub struct SymbolFreq
    symbol: char,
    frequency: u32,


    fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));

    result


    pub struct Node
    left : Option<Box<Node>>,
    right : Option<Box<Node>>,
    symbol_freq : SymbolFreq,


    impl Node
    fn is_leaf(&self) -> bool
    self.left.is_none() && self.right.is_none()



    impl PartialEq for Node
    fn eq(&self, other: &Self) -> bool
    self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)



    impl Eq for Node

    impl PartialOrd for Node
    fn partial_cmp(&self, other: &Self) -> Option<Ordering>
    other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)



    impl Ord for Node
    fn cmp(&self, other: &Self) -> Ordering
    other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)



    fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
    Box::new(Node left: None, right: None, symbol_freq: symbol_freq )


    fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>

    let symbol_freq = SymbolFreq
    symbol: 0 as char,
    frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
    ;

    Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )


    fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>

    let mut freqs = BinaryHeap::with_capacity(input.len());

    for e in input
    freqs.push(make_heap_node(e));


    freqs


    fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>

    if input.is_empty()
    return None;


    let root;

    loop

    assert!(!input.is_empty());

    let l = input.pop().unwrap();

    let r = match input.pop()
    Some(x) => x,
    None => root = l; break; ,
    ;

    let n = make_combined_heap_node(l, r);

    match input.is_empty()
    true => root = n; break; ,
    false => input.push(n),



    assert!(input.is_empty());

    Some(root)


    fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)

    let n = match input.as_ref()
    Some(x) => x,
    None => return,
    ;

    if n.is_leaf()

    if encoding.is_empty() // root is leaf! -> add extra bit
    encoding.push(false);


    assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());

    else

    let mut l = encoding.clone();
    l.push(false);
    make_encoding_table(&n.left, l, output);

    let mut r = encoding;
    r.push(true);
    make_encoding_table(&n.right, r, output);



    fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)

    let freqs = make_frequency_table(input);
    debug::print_frequencies(&freqs);

    let queue = make_queue(freqs);
    debug::print_queue(&queue);

    let root = make_tree(queue);
    debug::print_tree(&root);

    let mut encodings = BTreeMap::new();
    make_encoding_table(&root, Vec::new(), &mut encodings);

    (root, encodings)


    fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>

    let mut result = Vec::new();

    for c in input.chars()
    result.extend(encodings.get(&c).unwrap());


    result


    fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>

    let mut result = String::new();
    let mut i = 0usize;

    while i != input.len()

    let mut next = root;

    loop

    let node = match next.as_ref()
    Some(x) => x,
    None => return Err("do_decoding() failed: reached null node in tree."),
    ;

    if node.is_leaf()
    result.push(node.symbol_freq.symbol);
    break;


    if i == input.len()
    return Err("do_decoding() failed: reached end of input partway through decoding char.");


    next = match input[i]
    false => &node.left,
    true => &node.right,
    ;

    i += 1;


    if next == root // root is leaf! -> always consume at least one bit!
    i += 1;



    Ok(result)



    pub type Tree = Option<Box<Node>>;
    pub type EncodedData = Vec<bool>;

    pub fn encode(input: &String) -> (Tree, EncodedData)

    let (tree, encodings) = make_encodings(input);
    debug::print_encodings(&encodings);

    let encoded_data = do_encoding(input, &encodings);
    debug::print_byte_comparison(input, &encoded_data);

    (tree, encoded_data)


    pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
    do_decoding(input, tree)



    mod debug

    use super::*;
    use std::collections::BTreeMap;
    use std::collections::BinaryHeap;
    use std::fmt::Write;

    pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
    println!("frequencies:");
    for e in frequencies.iter()
    println!("t(, )", e.symbol, e.frequency);



    pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
    println!("queue:");
    for n in queue
    println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);



    fn print_node(node: &Option<Box<Node>>, indent: u32)
    let n = match node.as_ref()
    Some(x) => x,
    None => return,
    ;

    for _ in 0..indent
    print!("t");

    println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);

    print_node(&n.left, indent + 1);
    print_node(&n.right, indent + 1);


    pub fn print_tree(root: &Option<Box<Node>>)
    println!("tree:");
    print_node(&root, 0);


    fn encoding_to_string(encoding: &Vec<bool>) -> String
    let mut s = String::with_capacity(encoding.len());
    for b in encoding
    write!(&mut s, "", *b as u8).unwrap();

    s


    pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
    println!("encodings:");
    for (k, v) in encodings
    print!(": n", k, encoding_to_string(v));



    pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
    print!("data: n", encoding_to_string(&encoded));
    print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);


    // debug

    #[cfg(test)]
    mod tests

    use super::*;

    #[test]
    fn encode_decode_empty_input()
    let input = "".to_string();
    let (t, d) = encode(&input);

    assert!(t.is_none());
    assert!(d.is_empty());

    let o = decode(&d, &t).unwrap();

    assert_eq!(o, input);


    #[test]
    fn encode_decode_single_char_input()
    let input = "aaaaaaa".to_string();
    let (t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    let o = decode(&d, &t).unwrap();

    assert_eq!(o, input);


    #[test]
    fn encode_decode_two_char_input()
    let input = "abaabbaa".to_string();
    let (t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    let o = decode(&d, &t).unwrap();

    assert_eq!(o, input);


    #[test]
    fn encode_decode_three_char_input()
    let input = " abaa bba a".to_string();
    let (t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    let o = decode(&d, &t).unwrap();

    assert_eq!(o, input);


    #[test]
    fn encode_decode_utf8_input()
    let input = "𝄞 αβ 忠犬ハチ公 hi".to_string();
    let (t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    let o = decode(&d, &t).unwrap();

    assert_eq!(o, input);


    #[test]
    fn decode_empty_encoded_data()

    let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
    let (t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    let o = decode(&Vec::new(), &t).unwrap();

    assert!(o.is_empty());


    #[test]
    #[should_panic(expected = "reached end of input")]
    fn decode_corrupt_encoded_data()

    let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
    let (t, mut d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    d.pop();
    d.pop();

    let _ = decode(&d, &t).unwrap();


    #[test]
    #[should_panic(expected = "reached null node")]
    fn decode_corrupt_tree()

    let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
    let (mut t, d) = encode(&input);

    assert!(!t.is_none());
    assert!(!d.is_empty());

    t.as_mut().unwrap().left = None;

    let _ = decode(&d, &t).unwrap();


    // tests

    // huff


    #[cfg(not(test))]
    fn main()

    let input = "this is an example for huffman encoding";
    print!("input: n", input);

    let (tree, data) = huff::encode(&input.to_string());

    let output = huff::decode(&data, &tree).unwrap();
    print!("output: n", output);



    Things I wonder about:



    • It seems like a lot of trait implementations are required to put something in a BinaryHeap... is there a simpler way?

    • The compiler claims std::fmt::Write is unused, but if I comment it out, it doesn't compile.

    • The version of Rust I have installed is kinda old (rustc 1.3.0 (9a92aaf19 2015-09-15)) as I've been without internet. Are there things that could be improved with a more modern version?

    Any other feedback is welcome.







    share|improve this question























      up vote
      3
      down vote

      favorite









      up vote
      3
      down vote

      favorite











      An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.



      mod huff 

      use std::boxed::Box;
      use std::cmp::Ordering;
      use std::collections::BTreeMap;
      use std::collections::BinaryHeap;
      use std::fmt::Write;

      pub struct SymbolFreq
      symbol: char,
      frequency: u32,


      fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));

      result


      pub struct Node
      left : Option<Box<Node>>,
      right : Option<Box<Node>>,
      symbol_freq : SymbolFreq,


      impl Node
      fn is_leaf(&self) -> bool
      self.left.is_none() && self.right.is_none()



      impl PartialEq for Node
      fn eq(&self, other: &Self) -> bool
      self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)



      impl Eq for Node

      impl PartialOrd for Node
      fn partial_cmp(&self, other: &Self) -> Option<Ordering>
      other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)



      impl Ord for Node
      fn cmp(&self, other: &Self) -> Ordering
      other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)



      fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
      Box::new(Node left: None, right: None, symbol_freq: symbol_freq )


      fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>

      let symbol_freq = SymbolFreq
      symbol: 0 as char,
      frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
      ;

      Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )


      fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>

      let mut freqs = BinaryHeap::with_capacity(input.len());

      for e in input
      freqs.push(make_heap_node(e));


      freqs


      fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>

      if input.is_empty()
      return None;


      let root;

      loop

      assert!(!input.is_empty());

      let l = input.pop().unwrap();

      let r = match input.pop()
      Some(x) => x,
      None => root = l; break; ,
      ;

      let n = make_combined_heap_node(l, r);

      match input.is_empty()
      true => root = n; break; ,
      false => input.push(n),



      assert!(input.is_empty());

      Some(root)


      fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)

      let n = match input.as_ref()
      Some(x) => x,
      None => return,
      ;

      if n.is_leaf()

      if encoding.is_empty() // root is leaf! -> add extra bit
      encoding.push(false);


      assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());

      else

      let mut l = encoding.clone();
      l.push(false);
      make_encoding_table(&n.left, l, output);

      let mut r = encoding;
      r.push(true);
      make_encoding_table(&n.right, r, output);



      fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)

      let freqs = make_frequency_table(input);
      debug::print_frequencies(&freqs);

      let queue = make_queue(freqs);
      debug::print_queue(&queue);

      let root = make_tree(queue);
      debug::print_tree(&root);

      let mut encodings = BTreeMap::new();
      make_encoding_table(&root, Vec::new(), &mut encodings);

      (root, encodings)


      fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>

      let mut result = Vec::new();

      for c in input.chars()
      result.extend(encodings.get(&c).unwrap());


      result


      fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>

      let mut result = String::new();
      let mut i = 0usize;

      while i != input.len()

      let mut next = root;

      loop

      let node = match next.as_ref()
      Some(x) => x,
      None => return Err("do_decoding() failed: reached null node in tree."),
      ;

      if node.is_leaf()
      result.push(node.symbol_freq.symbol);
      break;


      if i == input.len()
      return Err("do_decoding() failed: reached end of input partway through decoding char.");


      next = match input[i]
      false => &node.left,
      true => &node.right,
      ;

      i += 1;


      if next == root // root is leaf! -> always consume at least one bit!
      i += 1;



      Ok(result)



      pub type Tree = Option<Box<Node>>;
      pub type EncodedData = Vec<bool>;

      pub fn encode(input: &String) -> (Tree, EncodedData)

      let (tree, encodings) = make_encodings(input);
      debug::print_encodings(&encodings);

      let encoded_data = do_encoding(input, &encodings);
      debug::print_byte_comparison(input, &encoded_data);

      (tree, encoded_data)


      pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
      do_decoding(input, tree)



      mod debug

      use super::*;
      use std::collections::BTreeMap;
      use std::collections::BinaryHeap;
      use std::fmt::Write;

      pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
      println!("frequencies:");
      for e in frequencies.iter()
      println!("t(, )", e.symbol, e.frequency);



      pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
      println!("queue:");
      for n in queue
      println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);



      fn print_node(node: &Option<Box<Node>>, indent: u32)
      let n = match node.as_ref()
      Some(x) => x,
      None => return,
      ;

      for _ in 0..indent
      print!("t");

      println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);

      print_node(&n.left, indent + 1);
      print_node(&n.right, indent + 1);


      pub fn print_tree(root: &Option<Box<Node>>)
      println!("tree:");
      print_node(&root, 0);


      fn encoding_to_string(encoding: &Vec<bool>) -> String
      let mut s = String::with_capacity(encoding.len());
      for b in encoding
      write!(&mut s, "", *b as u8).unwrap();

      s


      pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
      println!("encodings:");
      for (k, v) in encodings
      print!(": n", k, encoding_to_string(v));



      pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
      print!("data: n", encoding_to_string(&encoded));
      print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);


      // debug

      #[cfg(test)]
      mod tests

      use super::*;

      #[test]
      fn encode_decode_empty_input()
      let input = "".to_string();
      let (t, d) = encode(&input);

      assert!(t.is_none());
      assert!(d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_single_char_input()
      let input = "aaaaaaa".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_two_char_input()
      let input = "abaabbaa".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_three_char_input()
      let input = " abaa bba a".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_utf8_input()
      let input = "𝄞 αβ 忠犬ハチ公 hi".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn decode_empty_encoded_data()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&Vec::new(), &t).unwrap();

      assert!(o.is_empty());


      #[test]
      #[should_panic(expected = "reached end of input")]
      fn decode_corrupt_encoded_data()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (t, mut d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      d.pop();
      d.pop();

      let _ = decode(&d, &t).unwrap();


      #[test]
      #[should_panic(expected = "reached null node")]
      fn decode_corrupt_tree()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (mut t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      t.as_mut().unwrap().left = None;

      let _ = decode(&d, &t).unwrap();


      // tests

      // huff


      #[cfg(not(test))]
      fn main()

      let input = "this is an example for huffman encoding";
      print!("input: n", input);

      let (tree, data) = huff::encode(&input.to_string());

      let output = huff::decode(&data, &tree).unwrap();
      print!("output: n", output);



      Things I wonder about:



      • It seems like a lot of trait implementations are required to put something in a BinaryHeap... is there a simpler way?

      • The compiler claims std::fmt::Write is unused, but if I comment it out, it doesn't compile.

      • The version of Rust I have installed is kinda old (rustc 1.3.0 (9a92aaf19 2015-09-15)) as I've been without internet. Are there things that could be improved with a more modern version?

      Any other feedback is welcome.







      share|improve this question













      An implementation of Huffman encoding / decoding as a learning exercise. I'm still new to Rust, so any tips about making things more natural for the language are appreciated.



      mod huff 

      use std::boxed::Box;
      use std::cmp::Ordering;
      use std::collections::BTreeMap;
      use std::collections::BinaryHeap;
      use std::fmt::Write;

      pub struct SymbolFreq
      symbol: char,
      frequency: u32,


      fn make_frequency_table(input: &String) -> Vec<SymbolFreq> a.frequency.cmp(&b.frequency));

      result


      pub struct Node
      left : Option<Box<Node>>,
      right : Option<Box<Node>>,
      symbol_freq : SymbolFreq,


      impl Node
      fn is_leaf(&self) -> bool
      self.left.is_none() && self.right.is_none()



      impl PartialEq for Node
      fn eq(&self, other: &Self) -> bool
      self.symbol_freq.frequency.eq(&other.symbol_freq.frequency)



      impl Eq for Node

      impl PartialOrd for Node
      fn partial_cmp(&self, other: &Self) -> Option<Ordering>
      other.symbol_freq.frequency.partial_cmp(&self.symbol_freq.frequency)



      impl Ord for Node
      fn cmp(&self, other: &Self) -> Ordering
      other.symbol_freq.frequency.cmp(&self.symbol_freq.frequency)



      fn make_heap_node(symbol_freq: SymbolFreq) -> Box<Node>
      Box::new(Node left: None, right: None, symbol_freq: symbol_freq )


      fn make_combined_heap_node(left: Box<Node>, right: Box<Node>) -> Box<Node>

      let symbol_freq = SymbolFreq
      symbol: 0 as char,
      frequency: left.symbol_freq.frequency + right.symbol_freq.frequency
      ;

      Box::new(Node left: Some(left), right: Some(right), symbol_freq: symbol_freq )


      fn make_queue(input: Vec<SymbolFreq>) -> BinaryHeap<Box<Node>>

      let mut freqs = BinaryHeap::with_capacity(input.len());

      for e in input
      freqs.push(make_heap_node(e));


      freqs


      fn make_tree(mut input: BinaryHeap<Box<Node>>) -> Option<Box<Node>>

      if input.is_empty()
      return None;


      let root;

      loop

      assert!(!input.is_empty());

      let l = input.pop().unwrap();

      let r = match input.pop()
      Some(x) => x,
      None => root = l; break; ,
      ;

      let n = make_combined_heap_node(l, r);

      match input.is_empty()
      true => root = n; break; ,
      false => input.push(n),



      assert!(input.is_empty());

      Some(root)


      fn make_encoding_table(input: &Option<Box<Node>>, mut encoding: Vec<bool>, output: &mut BTreeMap<char, Vec<bool>>)

      let n = match input.as_ref()
      Some(x) => x,
      None => return,
      ;

      if n.is_leaf()

      if encoding.is_empty() // root is leaf! -> add extra bit
      encoding.push(false);


      assert!(output.insert(n.symbol_freq.symbol, encoding).is_none());

      else

      let mut l = encoding.clone();
      l.push(false);
      make_encoding_table(&n.left, l, output);

      let mut r = encoding;
      r.push(true);
      make_encoding_table(&n.right, r, output);



      fn make_encodings(input: &String) -> (Option<Box<Node>>, BTreeMap<char, Vec<bool>>)

      let freqs = make_frequency_table(input);
      debug::print_frequencies(&freqs);

      let queue = make_queue(freqs);
      debug::print_queue(&queue);

      let root = make_tree(queue);
      debug::print_tree(&root);

      let mut encodings = BTreeMap::new();
      make_encoding_table(&root, Vec::new(), &mut encodings);

      (root, encodings)


      fn do_encoding(input: &String, encodings: &BTreeMap<char, Vec<bool>>) -> Vec<bool>

      let mut result = Vec::new();

      for c in input.chars()
      result.extend(encodings.get(&c).unwrap());


      result


      fn do_decoding(input: &Vec<bool>, root: &Option<Box<Node>>) -> Result<String, &'static str>

      let mut result = String::new();
      let mut i = 0usize;

      while i != input.len()

      let mut next = root;

      loop

      let node = match next.as_ref()
      Some(x) => x,
      None => return Err("do_decoding() failed: reached null node in tree."),
      ;

      if node.is_leaf()
      result.push(node.symbol_freq.symbol);
      break;


      if i == input.len()
      return Err("do_decoding() failed: reached end of input partway through decoding char.");


      next = match input[i]
      false => &node.left,
      true => &node.right,
      ;

      i += 1;


      if next == root // root is leaf! -> always consume at least one bit!
      i += 1;



      Ok(result)



      pub type Tree = Option<Box<Node>>;
      pub type EncodedData = Vec<bool>;

      pub fn encode(input: &String) -> (Tree, EncodedData)

      let (tree, encodings) = make_encodings(input);
      debug::print_encodings(&encodings);

      let encoded_data = do_encoding(input, &encodings);
      debug::print_byte_comparison(input, &encoded_data);

      (tree, encoded_data)


      pub fn decode(input: &EncodedData, tree: &Tree) -> Result<String, &'static str>
      do_decoding(input, tree)



      mod debug

      use super::*;
      use std::collections::BTreeMap;
      use std::collections::BinaryHeap;
      use std::fmt::Write;

      pub fn print_frequencies(frequencies: &Vec<SymbolFreq>)
      println!("frequencies:");
      for e in frequencies.iter()
      println!("t(, )", e.symbol, e.frequency);



      pub fn print_queue(queue: &BinaryHeap<Box<Node>>)
      println!("queue:");
      for n in queue
      println!("t(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);



      fn print_node(node: &Option<Box<Node>>, indent: u32)
      let n = match node.as_ref()
      Some(x) => x,
      None => return,
      ;

      for _ in 0..indent
      print!("t");

      println!("(, )", n.symbol_freq.symbol, n.symbol_freq.frequency);

      print_node(&n.left, indent + 1);
      print_node(&n.right, indent + 1);


      pub fn print_tree(root: &Option<Box<Node>>)
      println!("tree:");
      print_node(&root, 0);


      fn encoding_to_string(encoding: &Vec<bool>) -> String
      let mut s = String::with_capacity(encoding.len());
      for b in encoding
      write!(&mut s, "", *b as u8).unwrap();

      s


      pub fn print_encodings(encodings: &BTreeMap<char, Vec<bool>>)
      println!("encodings:");
      for (k, v) in encodings
      print!(": n", k, encoding_to_string(v));



      pub fn print_byte_comparison(unencoded: &String, encoded: &Vec<bool>)
      print!("data: n", encoding_to_string(&encoded));
      print!("original: bytesnencoded: ~bytesn", unencoded.len(), encoded.len() / 8);


      // debug

      #[cfg(test)]
      mod tests

      use super::*;

      #[test]
      fn encode_decode_empty_input()
      let input = "".to_string();
      let (t, d) = encode(&input);

      assert!(t.is_none());
      assert!(d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_single_char_input()
      let input = "aaaaaaa".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_two_char_input()
      let input = "abaabbaa".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_three_char_input()
      let input = " abaa bba a".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn encode_decode_utf8_input()
      let input = "𝄞 αβ 忠犬ハチ公 hi".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&d, &t).unwrap();

      assert_eq!(o, input);


      #[test]
      fn decode_empty_encoded_data()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      let o = decode(&Vec::new(), &t).unwrap();

      assert!(o.is_empty());


      #[test]
      #[should_panic(expected = "reached end of input")]
      fn decode_corrupt_encoded_data()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (t, mut d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      d.pop();
      d.pop();

      let _ = decode(&d, &t).unwrap();


      #[test]
      #[should_panic(expected = "reached null node")]
      fn decode_corrupt_tree()

      let input = "𝄞 αβ 忠犬ハチ公 hi asdkfweiuryhfiusdf".to_string();
      let (mut t, d) = encode(&input);

      assert!(!t.is_none());
      assert!(!d.is_empty());

      t.as_mut().unwrap().left = None;

      let _ = decode(&d, &t).unwrap();


      // tests

      // huff


      #[cfg(not(test))]
      fn main()

      let input = "this is an example for huffman encoding";
      print!("input: n", input);

      let (tree, data) = huff::encode(&input.to_string());

      let output = huff::decode(&data, &tree).unwrap();
      print!("output: n", output);



      Things I wonder about:



      • It seems like a lot of trait implementations are required to put something in a BinaryHeap... is there a simpler way?

      • The compiler claims std::fmt::Write is unused, but if I comment it out, it doesn't compile.

      • The version of Rust I have installed is kinda old (rustc 1.3.0 (9a92aaf19 2015-09-15)) as I've been without internet. Are there things that could be improved with a more modern version?

      Any other feedback is welcome.









      share|improve this question












      share|improve this question




      share|improve this question








      edited Mar 23 at 22:47









      Shepmaster

      6,5981018




      6,5981018









      asked Mar 19 at 16:08









      user673679

      1,042518




      1,042518

























          active

          oldest

          votes











          Your Answer




          StackExchange.ifUsing("editor", function ()
          return StackExchange.using("mathjaxEditing", function ()
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          );
          );
          , "mathjax-editing");

          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "196"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: false,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );








           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f189943%2fhuffman-encoding-in-rust%23new-answer', 'question_page');

          );

          Post as a guest



































          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes










           

          draft saved


          draft discarded


























           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f189943%2fhuffman-encoding-in-rust%23new-answer', 'question_page');

          );

          Post as a guest













































































          Popular posts from this blog

          Greedy Best First Search implementation in Rust

          Function to Return a JSON Like Objects Using VBA Collections and Arrays

          C++11 CLH Lock Implementation