-
Notifications
You must be signed in to change notification settings - Fork 841
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #107 from evanh/master
Add functions to allow storage of bitarrays
- Loading branch information
Showing
3 changed files
with
352 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
/* | ||
Copyright 2014 Workiva, LLC | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package bitarray | ||
|
||
import ( | ||
"bytes" | ||
"encoding/binary" | ||
"errors" | ||
"io" | ||
) | ||
|
||
// Marshal takes a dense or sparse bit array and serializes it to a | ||
// byte slice. | ||
func Marshal(ba BitArray) ([]byte, error) { | ||
if eba, ok := ba.(*bitArray); ok { | ||
return eba.Serialize() | ||
} else if sba, ok := ba.(*sparseBitArray); ok { | ||
return sba.Serialize() | ||
} else { | ||
return nil, errors.New("not a valid BitArray") | ||
} | ||
} | ||
|
||
// Unmarshal takes a byte slice, of the same format produced by Marshal, | ||
// and returns a BitArray. | ||
func Unmarshal(input []byte) (BitArray, error) { | ||
if len(input) == 0 { | ||
return nil, errors.New("no data in input") | ||
} | ||
if input[0] == 'B' { | ||
ret := newBitArray(0) | ||
err := ret.Deserialize(input) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return ret, nil | ||
} else if input[0] == 'S' { | ||
ret := newSparseBitArray() | ||
err := ret.Deserialize(input) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return ret, nil | ||
} else { | ||
return nil, errors.New("unrecognized encoding") | ||
} | ||
} | ||
|
||
// Serialize converts the sparseBitArray to a byte slice | ||
func (ba *sparseBitArray) Serialize() ([]byte, error) { | ||
w := new(bytes.Buffer) | ||
|
||
var identifier uint8 = 'S' | ||
err := binary.Write(w, binary.LittleEndian, identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
blocksLen := uint64(len(ba.blocks)) | ||
indexLen := uint64(len(ba.indices)) | ||
|
||
err = binary.Write(w, binary.LittleEndian, blocksLen) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
err = binary.Write(w, binary.LittleEndian, ba.blocks) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
err = binary.Write(w, binary.LittleEndian, indexLen) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
err = binary.Write(w, binary.LittleEndian, ba.indices) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return w.Bytes(), nil | ||
} | ||
|
||
// Deserialize takes the incoming byte slice, and populates the sparseBitArray | ||
// with data in the bytes. Note that this will overwrite any capacity | ||
// specified when creating the sparseBitArray. Also note that if an error | ||
// is returned, the sparseBitArray this is called on might be populated | ||
// with partial data. | ||
func (ret *sparseBitArray) Deserialize(incoming []byte) error { | ||
r := bytes.NewReader(incoming[1:]) // Discard identifier | ||
|
||
var intsToRead uint64 | ||
err := binary.Read(r, binary.LittleEndian, &intsToRead) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var nextblock block | ||
for i := intsToRead; i > uint64(0); i-- { | ||
err = binary.Read(r, binary.LittleEndian, &nextblock) | ||
if err != nil { | ||
return err | ||
} | ||
ret.blocks = append(ret.blocks, nextblock) | ||
} | ||
|
||
err = binary.Read(r, binary.LittleEndian, &intsToRead) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var nextuint uint64 | ||
for i := intsToRead; i > uint64(0); i-- { | ||
err = binary.Read(r, binary.LittleEndian, &nextuint) | ||
if err != nil { | ||
return err | ||
} | ||
ret.indices = append(ret.indices, nextuint) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// Serialize converts the bitArray to a byte slice. | ||
func (ba *bitArray) Serialize() ([]byte, error) { | ||
w := new(bytes.Buffer) | ||
|
||
var identifier uint8 = 'B' | ||
err := binary.Write(w, binary.LittleEndian, identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
err = binary.Write(w, binary.LittleEndian, ba.lowest) | ||
if err != nil { | ||
return nil, err | ||
} | ||
err = binary.Write(w, binary.LittleEndian, ba.highest) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
var encodedanyset uint8 | ||
if ba.anyset { | ||
encodedanyset = 1 | ||
} else { | ||
encodedanyset = 0 | ||
} | ||
err = binary.Write(w, binary.LittleEndian, encodedanyset) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
err = binary.Write(w, binary.LittleEndian, ba.blocks) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return w.Bytes(), nil | ||
} | ||
|
||
// Deserialize takes the incoming byte slice, and populates the bitArray | ||
// with data in the bytes. Note that this will overwrite any capacity | ||
// specified when creating the bitArray. Also note that if an error is returned, | ||
// the bitArray this is called on might be populated with partial data. | ||
func (ret *bitArray) Deserialize(incoming []byte) error { | ||
r := bytes.NewReader(incoming[1:]) // Discard identifier | ||
|
||
err := binary.Read(r, binary.LittleEndian, &ret.lowest) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
err = binary.Read(r, binary.LittleEndian, &ret.highest) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var encodedanyset uint8 | ||
err = binary.Read(r, binary.LittleEndian, &encodedanyset) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// anyset defaults to false so we don't need an else statement | ||
if encodedanyset == 1 { | ||
ret.anyset = true | ||
} | ||
|
||
var nextblock block | ||
err = binary.Read(r, binary.LittleEndian, &nextblock) | ||
for err == nil { | ||
ret.blocks = append(ret.blocks, nextblock) | ||
err = binary.Read(r, binary.LittleEndian, &nextblock) | ||
} | ||
if err != io.EOF { | ||
return err | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
/* | ||
Copyright 2014 Workiva, LLC | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package bitarray | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestSparseBitArraySerialization(t *testing.T) { | ||
numItems := uint64(1280) | ||
input := newSparseBitArray() | ||
|
||
for i := uint64(0); i < numItems; i++ { | ||
if i%3 == 0 { | ||
input.SetBit(i) | ||
} | ||
} | ||
|
||
outBytes, err := input.Serialize() | ||
assert.Equal(t, err, nil) | ||
|
||
assert.Equal(t, len(outBytes), 337) | ||
assert.True(t, outBytes[0] == 'S') | ||
expected := []byte{83, 20, 0, 0, 0, 0, 0, 0, 0, 73} | ||
assert.Equal(t, expected, outBytes[:10]) | ||
|
||
output := newSparseBitArray() | ||
err = output.Deserialize(outBytes) | ||
assert.Equal(t, err, nil) | ||
assert.True(t, input.Equals(output)) | ||
} | ||
|
||
func TestBitArraySerialization(t *testing.T) { | ||
numItems := uint64(1280) | ||
input := newBitArray(numItems) | ||
|
||
for i := uint64(0); i < numItems; i++ { | ||
if i%3 == 0 { | ||
input.SetBit(i) | ||
} | ||
} | ||
|
||
outBytes, err := input.Serialize() | ||
assert.Equal(t, err, nil) | ||
|
||
// 1280 bits = 20 blocks = 160 bytes, plus lowest and highest at | ||
// 128 bits = 16 bytes plus 1 byte for the anyset param and the identifer | ||
assert.Equal(t, len(outBytes), 178) | ||
|
||
expected := []byte{66, 0, 0, 0, 0, 0, 0, 0, 0, 254} | ||
assert.Equal(t, expected, outBytes[:10]) | ||
|
||
output := newBitArray(0) | ||
err = output.Deserialize(outBytes) | ||
assert.Equal(t, err, nil) | ||
assert.True(t, input.Equals(output)) | ||
} | ||
|
||
func TestBitArrayMarshalUnmarshal(t *testing.T) { | ||
numItems := uint64(1280) | ||
input := newBitArray(numItems) | ||
|
||
for i := uint64(0); i < numItems; i++ { | ||
if i%3 == 0 { | ||
input.SetBit(i) | ||
} | ||
} | ||
|
||
outputBytes, err := Marshal(input) | ||
assert.Equal(t, err, nil) | ||
assert.Equal(t, outputBytes[0], byte('B')) | ||
assert.Equal(t, len(outputBytes), 178) | ||
|
||
output, err := Unmarshal(outputBytes) | ||
assert.Equal(t, err, nil) | ||
|
||
assert.True(t, input.Equals(output)) | ||
} | ||
|
||
func TestSparseBitArrayMarshalUnmarshal(t *testing.T) { | ||
numItems := uint64(1280) | ||
input := newSparseBitArray() | ||
|
||
for i := uint64(0); i < numItems; i++ { | ||
if i%3 == 0 { | ||
input.SetBit(i) | ||
} | ||
} | ||
|
||
outputBytes, err := Marshal(input) | ||
assert.Equal(t, err, nil) | ||
assert.Equal(t, outputBytes[0], byte('S')) | ||
assert.Equal(t, len(outputBytes), 337) | ||
|
||
output, err := Unmarshal(outputBytes) | ||
assert.Equal(t, err, nil) | ||
|
||
assert.True(t, input.Equals(output)) | ||
} | ||
|
||
func TestUnmarshalErrors(t *testing.T) { | ||
numItems := uint64(1280) | ||
input := newBitArray(numItems) | ||
|
||
for i := uint64(0); i < numItems; i++ { | ||
if i%3 == 0 { | ||
input.SetBit(i) | ||
} | ||
} | ||
|
||
outputBytes, err := Marshal(input) | ||
|
||
outputBytes[0] = 'C' | ||
|
||
output, err := Unmarshal(outputBytes) | ||
assert.Error(t, err) | ||
assert.Equal(t, output, nil) | ||
|
||
output, err = Unmarshal(nil) | ||
assert.Error(t, err) | ||
assert.Equal(t, output, nil) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters