/** * huffman.scala - Computes huffman compression algorithm. * * * Autor: Pedro Garcia Freitas [sawp@sawp.com.br] * License: Creative Commons * <http: //creativecommons.org/licenses/by-nc-nd/2.5/en/> * * Apr 2011 */ type Node = (Double, Any) def huffman(list: List[(Double, String)]): List[(String, String)] = { def appendCode(m: String, x: List[Node]): List[Node] = for ((i,j) < - x) yield (i, m + j) def sortByFrequency(x: List[Node]) = { def sorter(e1: Node, e2: Node) = if (e1._1 < e2._1) true else false x.sortWith(sorter) } def group(l: List[Node]): List[Node] = { val ll = sortByFrequency(l) ll match { case Tuple2(f1: Double, f2: List[Node]) :: Tuple2(g1: Double, g2: List[Node]) :: t => group((f1 + g1, appendCode("0", f2) ::: appendCode("1", g2)) :: t) case _ => ll } } def combining(e: List[Node]) = e match { case c :: t => c._2 case _ => error("Not defined") } val k = for ((i,j) < - list) yield (i, List((j, ""))) val combined = group(k) val result = combining(combined) result.asInstanceOf[List[(String, String)]] } def calculateAverageLength(probs: List[(Double, String)], codes: List[(String, String)]): Double = { def getProbabilities(probs: List[(Double, String)]) = for ((prob, letter) <- probs) yield prob def getCodeLength(cods: List[(String, String)]) = for ((letter, code) <- cods) yield code.length def calculateAverage(codeLengths: List[Int], probs: List[Double]) = { val zipedCodesLengths = codeLengths.zip(probs) val len = for((codelen, prob) <- zipedCodesLengths) yield codelen * prob val average = len.sum average } val probabilities = getProbabilities(probs) val codeLengths = getCodeLength(codes) val average = calculateAverage(codeLengths, probabilities) average } def calculateEntropy(probs: List[(Double, String)]) = { def getProbabilities(probs: List[(Double, String)]) = for ((prob, letter) <- probs) yield prob def getLocalEntropy(probabilities: List[Double]) = for (prob <- probabilities) yield prob * (math.log(prob) / math.log(2.0)) def getEntropy(localEntropy: List[Double]) = -localEntropy.sum val probabilities = getProbabilities(probs) val localEntropy = getLocalEntropy(probabilities) val entropy = getEntropy(localEntropy) entropy } /* Tests */ val probs = (0.4, "A") :: (0.2, "B") :: (0.1, "C") :: (0.05, "D") :: (0.1, "E") :: (0.075, "F") :: (0.075, "G") :: Nil val codes = huffman(probs) val averagelen = calculateAverageLength(probs, codes) val entropy = calculateEntropy(probs) println("\n") println("Frequency and Symbols: " + probs) println("Huffman code: " + codes) println("Average length: " + averagelen) println("Entropy: " + entropy) |

*P.S. This also works for OpenBSD packages*.