Skip to content

Commit

Permalink
Merge pull request #8 from mrsndmn/iterator_performance_improvements
Browse files Browse the repository at this point in the history
Iterator performance improvements
  • Loading branch information
alldroll authored Nov 25, 2020
2 parents 8303230 + 7effe82 commit b4ddf83
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 36 deletions.
10 changes: 9 additions & 1 deletion cdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,18 @@ type Reader interface {
type Iterator interface {
// Next moves the iterator to the next record. Returns true on success otherwise returns false.
Next() (bool, error)
// Record returns the current record
// Record returns the current record. This method is lazy. It means, a data is read on require.
Record() Record
// HasNext tells if the iterator can be moved to the next record.
HasNext() bool
// Key returns key's []byte slice. It is usually easier to use and
// faster then iterator.Record().Key().
// Because it doesn't requiers allocation for record copy.
Key() ([]byte, error)
// ValueBytes returns values's []byte slice. It is usually easier to use and
// faster then iterator.Record().Key().
// Because it doesn't requiers allocation for record copy.
Value() ([]byte, error)
}

// Record provides API for reading record key, value.
Expand Down
2 changes: 1 addition & 1 deletion cdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (suite *CDBTestSuite) TestShouldReturnNilOnNonExistingKeys() {

for _, rec := range suite.testRecords {
value, err := reader.Get(rec.key)
suite.Nilf(err, "Can't get from cdb key: %s", string(rec.key))
suite.EqualError(err, ErrEntryNotFound.Error(), "Can't get from cdb key: %s", string(rec.key))
suite.Nil(value, "Value must be nil for non existing keys")

exists, err := reader.Has(rec.key)
Expand Down
67 changes: 51 additions & 16 deletions iterator.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package cdb

import (
"errors"
"io"
)

var ErrEmptyCDB = errors.New("cdb is empty")

// iterator implements Iterator interface
type iterator struct {
position uint32
Expand All @@ -28,10 +31,21 @@ func (s *sectionReaderFactory) create() (io.Reader, uint32) {
return io.NewSectionReader(s.reader, int64(s.position), int64(s.size)), s.size
}

// readSection reads current record. Returns []byte and error
func readSection(readerAt io.ReaderAt, position int64, size uint32) ([]byte, error) {
val := make([]byte, size)
readSize, err := readerAt.ReadAt(val, position)
if err != nil {
if err == io.EOF && readSize == int(size) {
return val, nil
}
return nil, err
}
return val, nil
}

// Next moves the iterator to the next record. Returns true on success otherwise returns false.
func (i *iterator) Next() (bool, error) {
i.record = nil

if !i.HasNext() {
return false, nil
}
Expand All @@ -42,31 +56,52 @@ func (i *iterator) Next() (bool, error) {
return false, err
}

i.record = &record{
keySectionFactory: &sectionReaderFactory{
reader: i.cdbReader.reader,
position: i.position + 8,
size: keySize,
},
valueSectionFactory: &sectionReaderFactory{
reader: i.cdbReader.reader,
position: i.position + 8 + keySize,
size: valSize,
},
}
i.record.keySectionFactory.position = i.position + 8
i.record.keySectionFactory.size = keySize

i.record.valueSectionFactory.position = i.position + 8 + keySize
i.record.valueSectionFactory.size = valSize

i.position += keySize + valSize + 8

return true, nil
}

// Record returns the current record
// Key returns key's []byte slice. It is usually easier to use and
// faster then iterator.Record().Key(). Because it doesn't requiers allocation for SectionReader
func (i *iterator) Key() ([]byte, error) {
keyFactory := i.record.keySectionFactory
return readSection(keyFactory.reader, int64(keyFactory.position), keyFactory.size)
}

// Value returns values's []byte slice. It is usually easier to use and
// faster then iterator.Record().Value(). Because it doesn't requiers allocation for SectionReader
func (i *iterator) Value() ([]byte, error) {
valueFactory := i.record.valueSectionFactory
return readSection(valueFactory.reader, int64(valueFactory.position), valueFactory.size)
}

// Record returns copy of current record
func (i *iterator) Record() Record {
return i.record
return &record{
keySectionFactory: &sectionReaderFactory{
reader: i.record.keySectionFactory.reader,
position: i.record.keySectionFactory.position,
size: i.record.keySectionFactory.size,
},
valueSectionFactory: &sectionReaderFactory{
reader: i.record.valueSectionFactory.reader,
position: i.record.valueSectionFactory.position,
size: i.record.valueSectionFactory.size,
},
}
}

// HasNext tells if the iterator can be moved to the next record.
func (i *iterator) HasNext() bool {
if i.cdbReader.IsEmpty() {
return false
}
return i.position < i.cdbReader.endPos
}

Expand Down
124 changes: 111 additions & 13 deletions iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,37 @@ import (
"testing"
)

func (suite *CDBTestSuite) getCDBIterator() Iterator {
func (suite *CDBTestSuite) getCDBIterator() (Iterator, error) {
reader := suite.getCDBReader()
iterator, err := reader.Iterator()
return reader.Iterator()
}

func (suite *CDBTestSuite) mustGetCDBIterator() Iterator {
iterator, err := suite.getCDBIterator()
suite.Require().Nilf(err, "Iterator creation error: %#v", err)
return iterator
}

func (suite *CDBTestSuite) TestIteratorOnEmptyDataSet() {
suite.writeEmptyCDB()

iterator := suite.getCDBIterator()
iterator, err := suite.getCDBIterator()

suite.False(iterator.HasNext(), "Iterator must return false on HasNext")
suite.EqualError(err, ErrEmptyCDB.Error())

suite.Nilf(iterator, "Iterator must return false on HasNext")
}

ok, err := iterator.Next()
suite.False(ok, "Next should returns false")
suite.Nilf(err, "Got unexpected error %v", err)
func (suite *CDBTestSuite) EqualKeyValue(iter Iterator, testRec testCDBRecord) {
suite.EqualRecords(iter.Record(), testRec)

key, err := iter.Key()
suite.Nilf(err, "Cant get key: %#v", err)
value, err := iter.Value()
suite.Nilf(err, "Cant get value: %#v", err)

suite.Equal(testRec.key, key, "Keys must be equal. Got: %s, Expected: %s", key, testRec.key)
suite.Equal(testRec.val, value, "Values must be equal. Got: %s, Expected: %s", value, testRec.val)
}

func (suite *CDBTestSuite) EqualRecords(record Record, testRec testCDBRecord) {
Expand All @@ -46,11 +60,10 @@ func (suite *CDBTestSuite) EqualRecords(record Record, testRec testCDBRecord) {
func (suite *CDBTestSuite) TestIterator() {
suite.fillTestCDB()

iterator := suite.getCDBIterator()
iterator := suite.mustGetCDBIterator()

for i, testRec := range suite.testRecords {
record := iterator.Record()
suite.EqualRecords(record, testRec)
suite.EqualKeyValue(iterator, testRec)

ok, err := iterator.Next()
suite.Nilf(err, "Error on interator.Next: %#v", err)
Expand All @@ -66,6 +79,20 @@ func (suite *CDBTestSuite) TestIterator() {
suite.False(ok, "HasNext should returns false if has no more records")
}

func (suite *CDBTestSuite) TestIteratorNext() {
suite.fillTestCDB()
iterator := suite.mustGetCDBIterator()

record1 := iterator.Record()
suite.EqualRecords(record1, suite.testRecords[0])

iterator.Next()
record2 := iterator.Record()
suite.EqualRecords(record2, suite.testRecords[1])

suite.NotEqual(record1, record2)
}

func (suite *CDBTestSuite) TestIteratorAt() {
suite.fillTestCDB()
reader := suite.getCDBReader()
Expand All @@ -74,12 +101,11 @@ func (suite *CDBTestSuite) TestIteratorAt() {
iterator, err := reader.IteratorAt(testRec.key)
suite.Nilf(err, "Unexpected error for reader.IteratorAt: %#v", err)

record := iterator.Record()
suite.EqualRecords(record, testRec)
suite.EqualKeyValue(iterator, testRec)
}
}

func BenchmarkReaderIteratorAt(b *testing.B) {
func BenchmarkIteratorAt(b *testing.B) {

n := 1000
f, _ := os.Create("test.cdb")
Expand All @@ -103,3 +129,75 @@ func BenchmarkReaderIteratorAt(b *testing.B) {
reader.IteratorAt(keys[j%n])
}
}

func BenchmarkIteratorNext(b *testing.B) {
iter, f := getTestCDBIterator(b)
defer f.Close()
defer os.Remove(f.Name())

b.ResetTimer()
for j := 0; j < b.N; j++ {
iter.Next()
}
b.StopTimer()

}

func BenchmarkIteratorRecordKey(b *testing.B) {
iter, f := getTestCDBIterator(b)
defer f.Close()
defer os.Remove(f.Name())

b.ResetTimer()
for j := 0; j < b.N; j++ {
reader, size := iter.Record().Key()
key := make([]byte, size)
reader.Read(key)
}
b.StopTimer()

}

func BenchmarkIteratorKey(b *testing.B) {
iter, f := getTestCDBIterator(b)
defer f.Close()
defer os.Remove(f.Name())

b.ResetTimer()
for j := 0; j < b.N; j++ {
iter.Key()
}
b.StopTimer()
}

func getTestCDBIterator(b *testing.B) (Iterator, *os.File) {
f, err := os.Create("test.cdb")
if err != nil {
panic(err)
}

handle := New()
writer, err := handle.GetWriter(f)
if err != nil {
panic(err)
}

keys := make([][]byte, b.N)
for i := 0; i < b.N; i++ {
keys[i] = []byte(strconv.Itoa(i))
writer.Put(keys[i], keys[i])
}

writer.Close()
reader, err := handle.GetReader(f)
if err != nil {
panic(err)
}

iter, err := reader.Iterator()
if err != nil {
panic(err)
}

return iter, f
}
43 changes: 38 additions & 5 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import (
"io"
)

// EntryDoesNotExists could be returned for Get method is cdb has no such key
var ErrEntryNotFound = errors.New("cdb entry not found")

// hashTableRef is a pointer that state a position and a length of the hash table
// position is the starting byte position of the hash table.
// The length is the number of slots in the hash table.
Expand Down Expand Up @@ -62,13 +65,21 @@ func (r *readerImpl) initialize() error {
return nil
}

// IsEmpty returns true if cdb has no records
func (r *readerImpl) IsEmpty() bool {
return r.endPos == 0
}

// Get returns the first value associated with the given key
func (r *readerImpl) Get(key []byte) ([]byte, error) {
valueSection, err := r.findEntry(key)

if valueSection == nil || err != nil {
if err != nil {
return nil, err
}
if valueSection == nil {
return nil, ErrEntryNotFound
}

value := make([]byte, valueSection.size)

Expand All @@ -88,7 +99,11 @@ func (r *readerImpl) Has(key []byte) (bool, error) {

// Iterator returns new Iterator object that points on first record
func (r *readerImpl) Iterator() (Iterator, error) {
iterator := r.newIterator(tablesRefsSize, nil, nil)
iterator, err := r.newIterator(tablesRefsSize, nil, nil)

if err != nil {
return nil, err
}

if _, err := iterator.Next(); err != nil {
return nil, err
Expand All @@ -112,7 +127,7 @@ func (r *readerImpl) IteratorAt(key []byte) (Iterator, error) {
size: uint32(len(key)),
},
valueSection,
), nil
)
}

// Size returns the size of the dataset
Expand Down Expand Up @@ -224,13 +239,31 @@ func (r *readerImpl) readPair(pos uint32, a, b *uint32) error {
}

// newIterator returns new instance of Iterator object
func (r *readerImpl) newIterator(position uint32, keySectionFactory, valueSectionFactory *sectionReaderFactory) Iterator {
return &iterator{
func (r *readerImpl) newIterator(position uint32, keySectionFactory, valueSectionFactory *sectionReaderFactory) (Iterator, error) {

if r.IsEmpty() {
return nil, ErrEmptyCDB
}

if keySectionFactory == nil {
keySectionFactory = &sectionReaderFactory{
reader: r.reader,
}
}
if valueSectionFactory == nil {
valueSectionFactory = &sectionReaderFactory{
reader: r.reader,
}
}

resIterator := &iterator{
position: position,
cdbReader: r,
record: &record{
keySectionFactory: keySectionFactory,
valueSectionFactory: valueSectionFactory,
},
}

return resIterator, nil
}

0 comments on commit b4ddf83

Please sign in to comment.