Skip to content

Commit

Permalink
Merge pull request #107 from evanh/master
Browse files Browse the repository at this point in the history
Add functions to allow storage of bitarrays
  • Loading branch information
dustinhiatt-wf committed Aug 4, 2015
2 parents 0d929fe + 054d629 commit a430e78
Show file tree
Hide file tree
Showing 3 changed files with 352 additions and 0 deletions.
213 changes: 213 additions & 0 deletions bitarray/encoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*
Copyright 2014 Workiva, LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package bitarray

import (
"bytes"
"encoding/binary"
"errors"
"io"
)

// Marshal takes a dense or sparse bit array and serializes it to a
// byte slice.
func Marshal(ba BitArray) ([]byte, error) {
if eba, ok := ba.(*bitArray); ok {
return eba.Serialize()
} else if sba, ok := ba.(*sparseBitArray); ok {
return sba.Serialize()
} else {
return nil, errors.New("not a valid BitArray")
}
}

// Unmarshal takes a byte slice, of the same format produced by Marshal,
// and returns a BitArray.
func Unmarshal(input []byte) (BitArray, error) {
if len(input) == 0 {
return nil, errors.New("no data in input")
}
if input[0] == 'B' {
ret := newBitArray(0)
err := ret.Deserialize(input)
if err != nil {
return nil, err
}
return ret, nil
} else if input[0] == 'S' {
ret := newSparseBitArray()
err := ret.Deserialize(input)
if err != nil {
return nil, err
}
return ret, nil
} else {
return nil, errors.New("unrecognized encoding")
}
}

// Serialize converts the sparseBitArray to a byte slice
func (ba *sparseBitArray) Serialize() ([]byte, error) {
w := new(bytes.Buffer)

var identifier uint8 = 'S'
err := binary.Write(w, binary.LittleEndian, identifier)
if err != nil {
return nil, err
}

blocksLen := uint64(len(ba.blocks))
indexLen := uint64(len(ba.indices))

err = binary.Write(w, binary.LittleEndian, blocksLen)
if err != nil {
return nil, err
}

err = binary.Write(w, binary.LittleEndian, ba.blocks)
if err != nil {
return nil, err
}

err = binary.Write(w, binary.LittleEndian, indexLen)
if err != nil {
return nil, err
}

err = binary.Write(w, binary.LittleEndian, ba.indices)
if err != nil {
return nil, err
}
return w.Bytes(), nil
}

// Deserialize takes the incoming byte slice, and populates the sparseBitArray
// with data in the bytes. Note that this will overwrite any capacity
// specified when creating the sparseBitArray. Also note that if an error
// is returned, the sparseBitArray this is called on might be populated
// with partial data.
func (ret *sparseBitArray) Deserialize(incoming []byte) error {
r := bytes.NewReader(incoming[1:]) // Discard identifier

var intsToRead uint64
err := binary.Read(r, binary.LittleEndian, &intsToRead)
if err != nil {
return err
}

var nextblock block
for i := intsToRead; i > uint64(0); i-- {
err = binary.Read(r, binary.LittleEndian, &nextblock)
if err != nil {
return err
}
ret.blocks = append(ret.blocks, nextblock)
}

err = binary.Read(r, binary.LittleEndian, &intsToRead)
if err != nil {
return err
}

var nextuint uint64
for i := intsToRead; i > uint64(0); i-- {
err = binary.Read(r, binary.LittleEndian, &nextuint)
if err != nil {
return err
}
ret.indices = append(ret.indices, nextuint)
}

return nil
}

// Serialize converts the bitArray to a byte slice.
func (ba *bitArray) Serialize() ([]byte, error) {
w := new(bytes.Buffer)

var identifier uint8 = 'B'
err := binary.Write(w, binary.LittleEndian, identifier)
if err != nil {
return nil, err
}

err = binary.Write(w, binary.LittleEndian, ba.lowest)
if err != nil {
return nil, err
}
err = binary.Write(w, binary.LittleEndian, ba.highest)
if err != nil {
return nil, err
}

var encodedanyset uint8
if ba.anyset {
encodedanyset = 1
} else {
encodedanyset = 0
}
err = binary.Write(w, binary.LittleEndian, encodedanyset)
if err != nil {
return nil, err
}

err = binary.Write(w, binary.LittleEndian, ba.blocks)
if err != nil {
return nil, err
}
return w.Bytes(), nil
}

// Deserialize takes the incoming byte slice, and populates the bitArray
// with data in the bytes. Note that this will overwrite any capacity
// specified when creating the bitArray. Also note that if an error is returned,
// the bitArray this is called on might be populated with partial data.
func (ret *bitArray) Deserialize(incoming []byte) error {
r := bytes.NewReader(incoming[1:]) // Discard identifier

err := binary.Read(r, binary.LittleEndian, &ret.lowest)
if err != nil {
return err
}

err = binary.Read(r, binary.LittleEndian, &ret.highest)
if err != nil {
return err
}

var encodedanyset uint8
err = binary.Read(r, binary.LittleEndian, &encodedanyset)
if err != nil {
return err
}

// anyset defaults to false so we don't need an else statement
if encodedanyset == 1 {
ret.anyset = true
}

var nextblock block
err = binary.Read(r, binary.LittleEndian, &nextblock)
for err == nil {
ret.blocks = append(ret.blocks, nextblock)
err = binary.Read(r, binary.LittleEndian, &nextblock)
}
if err != io.EOF {
return err
}
return nil
}
138 changes: 138 additions & 0 deletions bitarray/encoding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
Copyright 2014 Workiva, LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package bitarray

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestSparseBitArraySerialization(t *testing.T) {
numItems := uint64(1280)
input := newSparseBitArray()

for i := uint64(0); i < numItems; i++ {
if i%3 == 0 {
input.SetBit(i)
}
}

outBytes, err := input.Serialize()
assert.Equal(t, err, nil)

assert.Equal(t, len(outBytes), 337)
assert.True(t, outBytes[0] == 'S')
expected := []byte{83, 20, 0, 0, 0, 0, 0, 0, 0, 73}
assert.Equal(t, expected, outBytes[:10])

output := newSparseBitArray()
err = output.Deserialize(outBytes)
assert.Equal(t, err, nil)
assert.True(t, input.Equals(output))
}

func TestBitArraySerialization(t *testing.T) {
numItems := uint64(1280)
input := newBitArray(numItems)

for i := uint64(0); i < numItems; i++ {
if i%3 == 0 {
input.SetBit(i)
}
}

outBytes, err := input.Serialize()
assert.Equal(t, err, nil)

// 1280 bits = 20 blocks = 160 bytes, plus lowest and highest at
// 128 bits = 16 bytes plus 1 byte for the anyset param and the identifer
assert.Equal(t, len(outBytes), 178)

expected := []byte{66, 0, 0, 0, 0, 0, 0, 0, 0, 254}
assert.Equal(t, expected, outBytes[:10])

output := newBitArray(0)
err = output.Deserialize(outBytes)
assert.Equal(t, err, nil)
assert.True(t, input.Equals(output))
}

func TestBitArrayMarshalUnmarshal(t *testing.T) {
numItems := uint64(1280)
input := newBitArray(numItems)

for i := uint64(0); i < numItems; i++ {
if i%3 == 0 {
input.SetBit(i)
}
}

outputBytes, err := Marshal(input)
assert.Equal(t, err, nil)
assert.Equal(t, outputBytes[0], byte('B'))
assert.Equal(t, len(outputBytes), 178)

output, err := Unmarshal(outputBytes)
assert.Equal(t, err, nil)

assert.True(t, input.Equals(output))
}

func TestSparseBitArrayMarshalUnmarshal(t *testing.T) {
numItems := uint64(1280)
input := newSparseBitArray()

for i := uint64(0); i < numItems; i++ {
if i%3 == 0 {
input.SetBit(i)
}
}

outputBytes, err := Marshal(input)
assert.Equal(t, err, nil)
assert.Equal(t, outputBytes[0], byte('S'))
assert.Equal(t, len(outputBytes), 337)

output, err := Unmarshal(outputBytes)
assert.Equal(t, err, nil)

assert.True(t, input.Equals(output))
}

func TestUnmarshalErrors(t *testing.T) {
numItems := uint64(1280)
input := newBitArray(numItems)

for i := uint64(0); i < numItems; i++ {
if i%3 == 0 {
input.SetBit(i)
}
}

outputBytes, err := Marshal(input)

outputBytes[0] = 'C'

output, err := Unmarshal(outputBytes)
assert.Error(t, err)
assert.Equal(t, output, nil)

output, err = Unmarshal(nil)
assert.Error(t, err)
assert.Equal(t, output, nil)
}
1 change: 1 addition & 0 deletions bitarray/sparse_bitarray.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ type sparseBitArray struct {
func (sba *sparseBitArray) SetBit(k uint64) error {
index, position := getIndexAndRemainder(k)
i, inserted := sba.indices.insert(index)

if inserted {
sba.blocks.insert(i)
}
Expand Down

0 comments on commit a430e78

Please sign in to comment.