git/odb/pack: introduce v2 index format support

This commit is contained in:
Taylor Blau 2017-07-14 17:47:28 -06:00
parent 4982c440cb
commit 1b9d92b6ce
6 changed files with 269 additions and 0 deletions

@ -15,6 +15,33 @@ const (
VersionWidth = 4
// V2Width is the total width of the header in V2.
V2Width = MagicWidth + VersionWidth
// FanoutEntries is the number of entries in the fanout table.
FanoutEntries = 256
// FanoutEntryWidth is the width of each entry in the fanout table.
FanoutEntryWidth = 4
// FanoutWidth is the width of the entire fanout table.
FanoutWidth = FanoutEntries * FanoutEntryWidth
// OffsetV2Start is the location of the first object outside of the V2
// header.
OffsetV2Start = V2Width + FanoutWidth
// ObjectNameWidth is the width of a SHA1 object name.
ObjectNameWidth = 20
// ObjectCRCWidth is the width of the CRC accompanying each object in
// V2.
ObjectCRCWidth = 4
// ObjectSmallOffsetWidth is the width of the small offset encoded into
// each object.
ObjectSmallOffsetWidth = 4
// ObjectLargeOffsetWidth is the width of the optional large offset
// encoded into the small offset.
ObjectLargeOffsetWidth = 8
// ObjectEntryV2Width is the width of one non-contiguous object entry in
// V2.
ObjectEntryV2Width = ObjectNameWidth + ObjectCRCWidth + ObjectSmallOffsetWidth
)
var (
@ -66,6 +93,10 @@ func decodeIndexHeader(r io.ReaderAt) (IndexVersion, error) {
}
version := IndexVersion(binary.BigEndian.Uint32(vb))
switch version {
case V2:
return version, nil
}
return version, &UnsupportedVersionErr{uint32(version)}
}

@ -2,12 +2,44 @@ package pack
import (
"bytes"
"encoding/binary"
"io"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDecodeIndexV2(t *testing.T) {
buf := make([]byte, 0, V2Width+FanoutWidth)
buf = append(buf, 0xff, 0x74, 0x4f, 0x63)
buf = append(buf, 0x0, 0x0, 0x0, 0x2)
for i := 0; i < FanoutEntries; i++ {
x := make([]byte, 4)
binary.BigEndian.PutUint32(x, uint32(3))
buf = append(buf, x...)
}
idx, err := DecodeIndex(bytes.NewReader(buf))
assert.NoError(t, err)
assert.Equal(t, V2, idx.version)
assert.EqualValues(t, 3, idx.Count())
}
func TestDecodeIndexV2InvalidFanout(t *testing.T) {
buf := make([]byte, 0, V2Width+FanoutWidth-FanoutEntryWidth)
buf = append(buf, 0xff, 0x74, 0x4f, 0x63)
buf = append(buf, 0x0, 0x0, 0x0, 0x2)
buf = append(buf, make([]byte, FanoutWidth-1)...)
idx, err := DecodeIndex(bytes.NewReader(buf))
assert.Equal(t, ErrShortFanout, err)
assert.Nil(t, idx)
}
func TestDecodeIndexUnsupportedVersion(t *testing.T) {
buf := make([]byte, 0, 4+4)
buf = append(buf, 0xff, 0x74, 0x4f, 0x63)

67
git/odb/pack/index_v2.go Normal file

@ -0,0 +1,67 @@
package pack
import (
"bytes"
"encoding/binary"
)
const (
// V2 is an instance of IndexVersion corresponding to the V2 index file
// format.
V2 IndexVersion = 2
)
// v2Search implements the IndexVersion.Search method for V2 packfiles.
func v2Search(idx *Index, name []byte, at int64) (*IndexEntry, int, error) {
var sha [20]byte
if _, err := idx.readAt(sha[:], v2ShaOffset(at)); err != nil {
return nil, 0, err
}
cmp := bytes.Compare(name, sha[:])
if cmp != 0 {
return nil, cmp, nil
}
var offs [4]byte
if _, err := idx.readAt(offs[:], v2SmallOffsetOffset(at, int64(idx.Count()))); err != nil {
return nil, 0, err
}
loc := uint64(binary.BigEndian.Uint32(offs[:]))
if loc&0x80000000 > 0 {
// If the most significant bit (MSB) of the offset is set, then
// the offset encodes the location for an 8-byte offset.
//
// Mask away (offs&0x7fffffff) the MSB to return the remaining
// offset.
var offs [8]byte
if _, err := idx.readAt(offs[:], int64(loc&0x7fffffff)); err != nil {
return nil, 0, err
}
loc = binary.BigEndian.Uint64(offs[:])
}
return &IndexEntry{PackOffset: loc}, 0, nil
}
// v2ShaOffset returns the offset of a SHA1 given at "at" in the V2 index file.
func v2ShaOffset(at int64) int64 {
// Skip the packfile index header and the L1 fanout table.
return OffsetV2Start +
// Skip until the desired name in the sorted names table.
(ObjectNameWidth * at)
}
// v2SmallOffsetOffset returns the offset of an object's small (4-byte) offset
// given by "at".
func v2SmallOffsetOffset(at, total int64) int64 {
// Skip the packfile index header and the L1 fanout table.
return OffsetV2Start +
// Skip the name table.
(ObjectNameWidth * total) +
// Skip the CRC table.
(ObjectCRCWidth * total) +
// Skip until the desired index in the small offsets table.
(ObjectSmallOffsetWidth * at)
}

@ -0,0 +1,106 @@
package pack
import (
"bytes"
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
var (
V2IndexHeader = []byte{
0xff, 0x74, 0x4f, 0x63,
0x00, 0x00, 0x00, 0x02,
}
V2IndexFanout = make([]uint32, FanoutEntries)
V2IndexNames = []byte{
0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
}
V2IndexSmallSha = V2IndexNames[0:20]
V2IndexMediumSha = V2IndexNames[20:40]
V2IndexLargeSha = V2IndexNames[40:60]
V2IndexCRCs = []byte{
0x0, 0x0, 0x0, 0x0,
0x1, 0x1, 0x1, 0x1,
0x2, 0x2, 0x2, 0x2,
}
V2IndexOffsets = []byte{
0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x02,
0x80, 0x00, 0x04, 0x5c,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
}
V2Index = &Index{
fanout: V2IndexFanout,
version: V2,
}
)
func TestIndexV2SearchExact(t *testing.T) {
e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 1)
assert.Equal(t, 0, cmp)
assert.NoError(t, err)
assert.EqualValues(t, 2, e.PackOffset)
}
func TestIndexV2SearchSmall(t *testing.T) {
e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 0)
assert.Equal(t, 1, cmp)
assert.NoError(t, err)
assert.Nil(t, e)
}
func TestIndexV2SearchBig(t *testing.T) {
e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 2)
assert.Equal(t, -1, cmp)
assert.NoError(t, err)
assert.Nil(t, e)
}
func TestIndexV2SearchExtendedOffset(t *testing.T) {
e, cmp, err := V2.Search(V2Index, V2IndexLargeSha, 2)
assert.Equal(t, 0, cmp)
assert.NoError(t, err)
assert.EqualValues(t, 3, e.PackOffset)
}
func init() {
V2IndexFanout[1] = 1
V2IndexFanout[2] = 2
V2IndexFanout[3] = 3
for i := 3; i < len(V2IndexFanout); i++ {
V2IndexFanout[i] = 3
}
fanout := make([]byte, FanoutWidth)
for i, n := range V2IndexFanout {
binary.BigEndian.PutUint32(fanout[i*FanoutEntryWidth:], n)
}
buf := make([]byte, 0, OffsetV2Start+3*(ObjectEntryV2Width)+ObjectLargeOffsetWidth)
buf = append(buf, V2IndexHeader...)
buf = append(buf, fanout...)
buf = append(buf, V2IndexNames...)
buf = append(buf, V2IndexCRCs...)
buf = append(buf, V2IndexOffsets...)
V2Index.f = bytes.NewReader(buf)
}

@ -1,6 +1,7 @@
package pack
import (
"errors"
"fmt"
)
@ -16,5 +17,33 @@ const (
// Width returns the width of the header given in the respective version.
func (v IndexVersion) Width() int64 {
switch v {
case V2:
return V2Width
}
panic(fmt.Sprintf("git/odb/pack: width unknown for pack version %d", v))
}
var (
// ErrIndexOutOfBounds is an error returned when the object lookup "at"
// (see: Search() below) is out of bounds.
ErrIndexOutOfBounds = errors.New("git/odb/pack: index is out of bounds")
)
// Search searches index "idx" for an object given by "name" at location "at".
//
// If will return the object if it was found, or a comparison determining
// whether to search above or below next.
//
// Otherwise, it will return an error.
func (v IndexVersion) Search(idx *Index, name []byte, at int64) (*IndexEntry, int, error) {
if at > int64(idx.Count()) {
return nil, 0, ErrIndexOutOfBounds
}
switch v {
case V2:
return v2Search(idx, name, at)
}
return nil, 0, &UnsupportedVersionErr{Got: uint32(v)}
}

@ -6,6 +6,10 @@ import (
"github.com/stretchr/testify/assert"
)
func TestIndexVersionWidthV2(t *testing.T) {
assert.EqualValues(t, 8, V2.Width())
}
func TestIndexVersionWidthPanicsOnUnknownVersion(t *testing.T) {
v := IndexVersion(5)