Skip to content

Commit

Permalink
Merge pull request #82 from brianpzaide/master
Browse files Browse the repository at this point in the history
Implementing Huffman coding for the compression challenge
  • Loading branch information
plutov authored Nov 9, 2024
2 parents 852d839 + c51d5b2 commit 3fdc383
Show file tree
Hide file tree
Showing 2 changed files with 324 additions and 2 deletions.
237 changes: 235 additions & 2 deletions compression/compression.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,242 @@
package compression

import (
"bytes"
"fmt"
"log"
"strings"
"unicode/utf8"

"encoding/binary"
)

// represents a node in a Hufmann Tree, holds character and it's number of occurances.
type HNode struct {
char rune
freq int
left, right *HNode
}

// implements the interface Orderable
func (hn *HNode) Less(other Orderable) bool {
otherhn, ok := other.(*HNode)
if !ok {
log.Printf("Type assertion failed: expected *HNode, got %T", other)
return false
}
return hn.freq < otherhn.freq
}

// builds HNodes and puts them in a minimum priority queue
func buildHNodesPQ(chars []rune) *MinPQ[Orderable] {
charFreq, hNodesPQ := make(map[rune]int), NewMinPQ[Orderable]()

for _, r := range chars {
charFreq[r] += 1
}

for k, v := range charFreq {
hf := &HNode{
char: k,
freq: v,
}
hNodesPQ.Insert(hf)
}

return hNodesPQ
}

// builds a Hufmann Tree by picking the HNode having the least character frequency
func buildHuffmannTree(hfNodesPQ *MinPQ[Orderable]) *HNode {
var a, b *HNode
for !hfNodesPQ.IsEmpty() {
temp, err := hfNodesPQ.DeleteMin()
a = temp.(*HNode)
if err != nil {
log.Fatalln(err.Error())
}
if hfNodesPQ.IsEmpty() {
break
} else {
temp, err := hfNodesPQ.DeleteMin()
b = temp.(*HNode)
if err != nil {
log.Fatalln(err.Error())
}
}
c := &HNode{
freq: a.freq + b.freq,
left: a,
right: b,
}
hfNodesPQ.Insert(c)
}

return a
}

// builds the mapping to encode the data. CodeCharMap maps the character to the Hufmann Code generated using the Hufmann Tree
func buildCodeCharMap(root *HNode) map[string]rune {
codeCharMap := make(map[string]rune)
var buildCodes func(node *HNode, codeBuffer *bytes.Buffer)
buildCodes = func(node *HNode, codeBuffer *bytes.Buffer) {
if node.left == nil && node.right == nil {
codeCharMap[codeBuffer.String()] = node.char
return
}
if node.left != nil {
codeBuffer.WriteByte('0')
buildCodes(node.left, codeBuffer)
codeBuffer.Truncate(codeBuffer.Len() - 1)
}
if node.right != nil {
codeBuffer.WriteByte('1')
buildCodes(node.right, codeBuffer)
codeBuffer.Truncate(codeBuffer.Len() - 1)
}
}

if root.left == nil && root.right == nil {
codeCharMap["0"] = root.char
return codeCharMap
}
var codeBuffer bytes.Buffer
buildCodes(root, &codeBuffer)
return codeCharMap
}

// Serializes the mapping used for encoding the data. Formatting: for each character and it's Hufmann code, <code length><characterlength><code><character>
func serializeCodeCharMap(codeCharMap map[string]rune) ([]byte, error) {
var encodedBytesBuffer bytes.Buffer

for k, v := range codeCharMap {
keyLen := len(k)
valLen := utf8.RuneLen(v)
if valLen == -1 {
return nil, fmt.Errorf("invalid UTF-8 rune: %v", v)
}

// write key length and value length as single bytes
encodedBytesBuffer.WriteByte(byte(keyLen))
encodedBytesBuffer.WriteByte(byte(valLen))

// write the key (code) and character
encodedBytesBuffer.WriteString(k)
encodedBytesBuffer.WriteRune(v)
}

return encodedBytesBuffer.Bytes(), nil
}

// Reconstructs the mapping used for encoding the data.
func deserializeCodeCharMap(encodedBytesBuffer *bytes.Buffer, encodingMappingLen int32) (map[string]rune, error) {
codeCharMap := make(map[string]rune)
n := 0

for n < int(encodingMappingLen) {
// Read the key length (uint8)
keyLenByte, err := encodedBytesBuffer.ReadByte()
if err != nil {
return nil, fmt.Errorf("failed to read key length: %v", err)
}
n += 1
keyLen := int(keyLenByte)

// Read the value length (uint8)
valLenByte, err := encodedBytesBuffer.ReadByte()
if err != nil {
return nil, fmt.Errorf("failed to read value length: %v", err)
}
n += 1
valLen := int(valLenByte)

// Read the key
key := make([]byte, keyLen)
if _, err := encodedBytesBuffer.Read(key); err != nil {
return nil, fmt.Errorf("failed to read key: %v", err)
}
n += keyLen
// Read the value (should be one rune)
valueBytes := make([]byte, valLen)
if _, err := encodedBytesBuffer.Read(valueBytes); err != nil {
return nil, fmt.Errorf("failed to read value: %v", err)
}
value, size := utf8.DecodeRune(valueBytes)
if size == 0 || value == utf8.RuneError {
log.Printf("Warning: invalid UTF-8 rune detected at position %d", n)
return nil, fmt.Errorf("invalid UTF-8 rune detected")
}
n += valLen
// Add to map
codeCharMap[string(key)] = value
}

return codeCharMap, nil
}

// encode the given string returns a string in the format: <length of encoding mapping><encoding mapping><encoded string>
func Encode(s string) string {
return s
chars := []rune(s)
pq := buildHNodesPQ(chars)
root := buildHuffmannTree(pq)

// codeCharMap will be used for decoding the string
codeCharMap := buildCodeCharMap(root)

var encodedBytesBuffer bytes.Buffer
encodingMappingBytes, err := serializeCodeCharMap(codeCharMap)
if err != nil {
log.Fatalln(err.Error())
}

// writing the length of the mapping used for encoding the given string s
err = binary.Write(&encodedBytesBuffer, binary.BigEndian, int32(len(encodingMappingBytes)))
if err != nil {
log.Fatalln("Error occured in writing the length of the encoding mapping to the buffer:", err)
}
// writing the encoding mapping itself
encodedBytesBuffer.Write(encodingMappingBytes)

// charCodeMap will be used for encoding the string
charCodeMap := make(map[rune]string)
for k, v := range codeCharMap {
charCodeMap[v] = k
}

for _, el := range chars {
encodedBytesBuffer.WriteString(charCodeMap[el])
}

return encodedBytesBuffer.String()
}

// decode the given string
func Decode(s string) string {
return s
encodedDataBytes := bytes.NewBuffer([]byte(s))
var encodingMappingLen int32
err := binary.Read(encodedDataBytes, binary.BigEndian, &encodingMappingLen)
if err != nil {
log.Fatalln("Error reading encoding mapping length:", err)
}

codeCharMap, err := deserializeCodeCharMap(encodedDataBytes, int32(encodingMappingLen))
if err != nil {
log.Fatalln(err.Error())
}

var sofar bytes.Buffer
var decodedStringBuilder strings.Builder

for encodedDataBytes.Len() > 0 {
k, err := encodedDataBytes.ReadByte()
if err != nil {
log.Fatalln(err.Error())
}
sofar.WriteByte(k)
if v, ok := codeCharMap[sofar.String()]; ok {
decodedStringBuilder.WriteRune(v)
sofar.Reset()
}
}
return decodedStringBuilder.String()
}
89 changes: 89 additions & 0 deletions compression/minpq.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package compression

import (
"fmt"
)

type Orderable interface {
Less(other Orderable) bool
}

type MinPQ[T Orderable] struct {
arr []T
}

func NewMinPQ[T Orderable]() *MinPQ[T] {
return &MinPQ[T]{
arr: make([]T, 0),
}
}

func (pq *MinPQ[T]) IsEmpty() bool {
return len(pq.arr) == 0
}

func (pq *MinPQ[T]) Size() int {
return len(pq.arr)
}

func (pq *MinPQ[T]) Min() (T, error) {
if len(pq.arr) == 0 {
var zero T
return zero, fmt.Errorf("priority queue is empty")
}
return pq.arr[0], nil
}

func (pq *MinPQ[T]) swap(a, b int) {
pq.arr[a], pq.arr[b] = pq.arr[b], pq.arr[a]
}

func (pq *MinPQ[T]) rise(k int) {
for k > 0 {
parent := (k - 1) / 2
if !pq.arr[k].Less(pq.arr[parent]) {
break
}
pq.swap(k, parent)
k = parent
}
}

func (pq *MinPQ[T]) sink(k int) {
n := len(pq.arr)
for 2*k+1 < n {
left := 2*k + 1
right := left + 1
smallest := left

if right < n && pq.arr[right].Less(pq.arr[left]) {
smallest = right
}

if !pq.arr[smallest].Less(pq.arr[k]) {
break
}

pq.swap(k, smallest)
k = smallest
}
}

func (pq *MinPQ[T]) Insert(x T) {
pq.arr = append(pq.arr, x)
pq.rise(len(pq.arr) - 1)
}

func (pq *MinPQ[T]) DeleteMin() (T, error) {
if len(pq.arr) == 0 {
var zero T
return zero, fmt.Errorf("priority queue is empty")
}
x := pq.arr[0]
pq.arr[0] = pq.arr[len(pq.arr)-1]
pq.arr = pq.arr[:len(pq.arr)-1]

pq.sink(0)

return x, nil
}

0 comments on commit 3fdc383

Please sign in to comment.