From 1b9d92b6ceb73b207b67f6926c8e66decebd229b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 14 Jul 2017 17:47:28 -0600 Subject: [PATCH] git/odb/pack: introduce v2 index format support --- git/odb/pack/index_decode.go | 31 +++++++++ git/odb/pack/index_decode_test.go | 32 +++++++++ git/odb/pack/index_v2.go | 67 ++++++++++++++++++ git/odb/pack/index_v2_test.go | 106 +++++++++++++++++++++++++++++ git/odb/pack/index_version.go | 29 ++++++++ git/odb/pack/index_version_test.go | 4 ++ 6 files changed, 269 insertions(+) create mode 100644 git/odb/pack/index_v2.go create mode 100644 git/odb/pack/index_v2_test.go diff --git a/git/odb/pack/index_decode.go b/git/odb/pack/index_decode.go index cb09412e..f8b5f336 100644 --- a/git/odb/pack/index_decode.go +++ b/git/odb/pack/index_decode.go @@ -15,6 +15,33 @@ const ( VersionWidth = 4 // V2Width is the total width of the header in V2. V2Width = MagicWidth + VersionWidth + + // FanoutEntries is the number of entries in the fanout table. + FanoutEntries = 256 + // FanoutEntryWidth is the width of each entry in the fanout table. + FanoutEntryWidth = 4 + // FanoutWidth is the width of the entire fanout table. + FanoutWidth = FanoutEntries * FanoutEntryWidth + + // OffsetV2Start is the location of the first object outside of the V2 + // header. + OffsetV2Start = V2Width + FanoutWidth + + // ObjectNameWidth is the width of a SHA1 object name. + ObjectNameWidth = 20 + // ObjectCRCWidth is the width of the CRC accompanying each object in + // V2. + ObjectCRCWidth = 4 + // ObjectSmallOffsetWidth is the width of the small offset encoded into + // each object. + ObjectSmallOffsetWidth = 4 + // ObjectLargeOffsetWidth is the width of the optional large offset + // encoded into the small offset. + ObjectLargeOffsetWidth = 8 + + // ObjectEntryV2Width is the width of one non-contiguous object entry in + // V2. + ObjectEntryV2Width = ObjectNameWidth + ObjectCRCWidth + ObjectSmallOffsetWidth ) var ( @@ -66,6 +93,10 @@ func decodeIndexHeader(r io.ReaderAt) (IndexVersion, error) { } version := IndexVersion(binary.BigEndian.Uint32(vb)) + switch version { + case V2: + return version, nil + } return version, &UnsupportedVersionErr{uint32(version)} } diff --git a/git/odb/pack/index_decode_test.go b/git/odb/pack/index_decode_test.go index 8f441a0f..2a078326 100644 --- a/git/odb/pack/index_decode_test.go +++ b/git/odb/pack/index_decode_test.go @@ -2,12 +2,44 @@ package pack import ( "bytes" + "encoding/binary" "io" "testing" "github.com/stretchr/testify/assert" ) +func TestDecodeIndexV2(t *testing.T) { + buf := make([]byte, 0, V2Width+FanoutWidth) + buf = append(buf, 0xff, 0x74, 0x4f, 0x63) + buf = append(buf, 0x0, 0x0, 0x0, 0x2) + for i := 0; i < FanoutEntries; i++ { + x := make([]byte, 4) + + binary.BigEndian.PutUint32(x, uint32(3)) + + buf = append(buf, x...) + } + + idx, err := DecodeIndex(bytes.NewReader(buf)) + + assert.NoError(t, err) + assert.Equal(t, V2, idx.version) + assert.EqualValues(t, 3, idx.Count()) +} + +func TestDecodeIndexV2InvalidFanout(t *testing.T) { + buf := make([]byte, 0, V2Width+FanoutWidth-FanoutEntryWidth) + buf = append(buf, 0xff, 0x74, 0x4f, 0x63) + buf = append(buf, 0x0, 0x0, 0x0, 0x2) + buf = append(buf, make([]byte, FanoutWidth-1)...) + + idx, err := DecodeIndex(bytes.NewReader(buf)) + + assert.Equal(t, ErrShortFanout, err) + assert.Nil(t, idx) +} + func TestDecodeIndexUnsupportedVersion(t *testing.T) { buf := make([]byte, 0, 4+4) buf = append(buf, 0xff, 0x74, 0x4f, 0x63) diff --git a/git/odb/pack/index_v2.go b/git/odb/pack/index_v2.go new file mode 100644 index 00000000..bae1d1bd --- /dev/null +++ b/git/odb/pack/index_v2.go @@ -0,0 +1,67 @@ +package pack + +import ( + "bytes" + "encoding/binary" +) + +const ( + // V2 is an instance of IndexVersion corresponding to the V2 index file + // format. + V2 IndexVersion = 2 +) + +// v2Search implements the IndexVersion.Search method for V2 packfiles. +func v2Search(idx *Index, name []byte, at int64) (*IndexEntry, int, error) { + var sha [20]byte + if _, err := idx.readAt(sha[:], v2ShaOffset(at)); err != nil { + return nil, 0, err + } + + cmp := bytes.Compare(name, sha[:]) + if cmp != 0 { + return nil, cmp, nil + } + + var offs [4]byte + if _, err := idx.readAt(offs[:], v2SmallOffsetOffset(at, int64(idx.Count()))); err != nil { + return nil, 0, err + } + + loc := uint64(binary.BigEndian.Uint32(offs[:])) + if loc&0x80000000 > 0 { + // If the most significant bit (MSB) of the offset is set, then + // the offset encodes the location for an 8-byte offset. + // + // Mask away (offs&0x7fffffff) the MSB to return the remaining + // offset. + var offs [8]byte + if _, err := idx.readAt(offs[:], int64(loc&0x7fffffff)); err != nil { + return nil, 0, err + } + + loc = binary.BigEndian.Uint64(offs[:]) + } + return &IndexEntry{PackOffset: loc}, 0, nil +} + +// v2ShaOffset returns the offset of a SHA1 given at "at" in the V2 index file. +func v2ShaOffset(at int64) int64 { + // Skip the packfile index header and the L1 fanout table. + return OffsetV2Start + + // Skip until the desired name in the sorted names table. + (ObjectNameWidth * at) +} + +// v2SmallOffsetOffset returns the offset of an object's small (4-byte) offset +// given by "at". +func v2SmallOffsetOffset(at, total int64) int64 { + // Skip the packfile index header and the L1 fanout table. + return OffsetV2Start + + // Skip the name table. + (ObjectNameWidth * total) + + // Skip the CRC table. + (ObjectCRCWidth * total) + + // Skip until the desired index in the small offsets table. + (ObjectSmallOffsetWidth * at) +} diff --git a/git/odb/pack/index_v2_test.go b/git/odb/pack/index_v2_test.go new file mode 100644 index 00000000..ccaddad6 --- /dev/null +++ b/git/odb/pack/index_v2_test.go @@ -0,0 +1,106 @@ +package pack + +import ( + "bytes" + "encoding/binary" + "testing" + + "github.com/stretchr/testify/assert" +) + +var ( + V2IndexHeader = []byte{ + 0xff, 0x74, 0x4f, 0x63, + 0x00, 0x00, 0x00, 0x02, + } + V2IndexFanout = make([]uint32, FanoutEntries) + + V2IndexNames = []byte{ + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, + + 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, + 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, + } + V2IndexSmallSha = V2IndexNames[0:20] + V2IndexMediumSha = V2IndexNames[20:40] + V2IndexLargeSha = V2IndexNames[40:60] + + V2IndexCRCs = []byte{ + 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1, 0x1, 0x1, + 0x2, 0x2, 0x2, 0x2, + } + + V2IndexOffsets = []byte{ + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x02, + 0x80, 0x00, 0x04, 0x5c, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + } + + V2Index = &Index{ + fanout: V2IndexFanout, + version: V2, + } +) + +func TestIndexV2SearchExact(t *testing.T) { + e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 1) + + assert.Equal(t, 0, cmp) + assert.NoError(t, err) + assert.EqualValues(t, 2, e.PackOffset) +} + +func TestIndexV2SearchSmall(t *testing.T) { + e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 0) + + assert.Equal(t, 1, cmp) + assert.NoError(t, err) + assert.Nil(t, e) +} + +func TestIndexV2SearchBig(t *testing.T) { + e, cmp, err := V2.Search(V2Index, V2IndexMediumSha, 2) + + assert.Equal(t, -1, cmp) + assert.NoError(t, err) + assert.Nil(t, e) +} + +func TestIndexV2SearchExtendedOffset(t *testing.T) { + e, cmp, err := V2.Search(V2Index, V2IndexLargeSha, 2) + + assert.Equal(t, 0, cmp) + assert.NoError(t, err) + assert.EqualValues(t, 3, e.PackOffset) +} + +func init() { + V2IndexFanout[1] = 1 + V2IndexFanout[2] = 2 + V2IndexFanout[3] = 3 + + for i := 3; i < len(V2IndexFanout); i++ { + V2IndexFanout[i] = 3 + } + + fanout := make([]byte, FanoutWidth) + for i, n := range V2IndexFanout { + binary.BigEndian.PutUint32(fanout[i*FanoutEntryWidth:], n) + } + + buf := make([]byte, 0, OffsetV2Start+3*(ObjectEntryV2Width)+ObjectLargeOffsetWidth) + buf = append(buf, V2IndexHeader...) + buf = append(buf, fanout...) + buf = append(buf, V2IndexNames...) + buf = append(buf, V2IndexCRCs...) + buf = append(buf, V2IndexOffsets...) + + V2Index.f = bytes.NewReader(buf) +} diff --git a/git/odb/pack/index_version.go b/git/odb/pack/index_version.go index 27656740..8dd6b3ad 100644 --- a/git/odb/pack/index_version.go +++ b/git/odb/pack/index_version.go @@ -1,6 +1,7 @@ package pack import ( + "errors" "fmt" ) @@ -16,5 +17,33 @@ const ( // Width returns the width of the header given in the respective version. func (v IndexVersion) Width() int64 { + switch v { + case V2: + return V2Width + } panic(fmt.Sprintf("git/odb/pack: width unknown for pack version %d", v)) } + +var ( + // ErrIndexOutOfBounds is an error returned when the object lookup "at" + // (see: Search() below) is out of bounds. + ErrIndexOutOfBounds = errors.New("git/odb/pack: index is out of bounds") +) + +// Search searches index "idx" for an object given by "name" at location "at". +// +// If will return the object if it was found, or a comparison determining +// whether to search above or below next. +// +// Otherwise, it will return an error. +func (v IndexVersion) Search(idx *Index, name []byte, at int64) (*IndexEntry, int, error) { + if at > int64(idx.Count()) { + return nil, 0, ErrIndexOutOfBounds + } + + switch v { + case V2: + return v2Search(idx, name, at) + } + return nil, 0, &UnsupportedVersionErr{Got: uint32(v)} +} diff --git a/git/odb/pack/index_version_test.go b/git/odb/pack/index_version_test.go index b41b6f95..1e41476f 100644 --- a/git/odb/pack/index_version_test.go +++ b/git/odb/pack/index_version_test.go @@ -6,6 +6,10 @@ import ( "github.com/stretchr/testify/assert" ) +func TestIndexVersionWidthV2(t *testing.T) { + assert.EqualValues(t, 8, V2.Width()) +} + func TestIndexVersionWidthPanicsOnUnknownVersion(t *testing.T) { v := IndexVersion(5)