Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 48bec23

Browse filesBrowse files
authored
Merge pull request #799 from pjbgf/perf2
plumbing: Optimise memory consumption for filesystem storage
2 parents 8c1e3e2 + 1c361ad commit 48bec23
Copy full SHA for 48bec23

File tree

Expand file treeCollapse file tree

4 files changed

+276
-61
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+276
-61
lines changed

‎plumbing/format/packfile/parser.go

Copy file name to clipboardExpand all lines: plumbing/format/packfile/parser.go
+145-50Lines changed: 145 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package packfile
33
import (
44
"bytes"
55
"errors"
6+
"fmt"
67
"io"
78

89
"github.com/go-git/go-git/v5/plumbing"
@@ -174,13 +175,25 @@ func (p *Parser) init() error {
174175
return nil
175176
}
176177

178+
type objectHeaderWriter func(typ plumbing.ObjectType, sz int64) error
179+
180+
type lazyObjectWriter interface {
181+
// LazyWriter enables an object to be lazily written.
182+
// It returns:
183+
// - w: a writer to receive the object's content.
184+
// - lwh: a func to write the object header.
185+
// - err: any error from the initial writer creation process.
186+
//
187+
// Note that if the object header is not written BEFORE the writer
188+
// is used, this will result in an invalid object.
189+
LazyWriter() (w io.WriteCloser, lwh objectHeaderWriter, err error)
190+
}
191+
177192
func (p *Parser) indexObjects() error {
178193
buf := sync.GetBytesBuffer()
179194
defer sync.PutBytesBuffer(buf)
180195

181196
for i := uint32(0); i < p.count; i++ {
182-
buf.Reset()
183-
184197
oh, err := p.scanner.NextObjectHeader()
185198
if err != nil {
186199
return err
@@ -220,21 +233,60 @@ func (p *Parser) indexObjects() error {
220233
ota = newBaseObject(oh.Offset, oh.Length, t)
221234
}
222235

223-
buf.Grow(int(oh.Length))
224-
_, crc, err := p.scanner.NextObject(buf)
236+
hasher := plumbing.NewHasher(oh.Type, oh.Length)
237+
writers := []io.Writer{hasher}
238+
var obj *plumbing.MemoryObject
239+
240+
// Lazy writing is only available for non-delta objects.
241+
if p.storage != nil && !delta {
242+
// When a storage is set and supports lazy writing,
243+
// use that instead of creating a memory object.
244+
if low, ok := p.storage.(lazyObjectWriter); ok {
245+
ow, lwh, err := low.LazyWriter()
246+
if err != nil {
247+
return err
248+
}
249+
250+
if err = lwh(oh.Type, oh.Length); err != nil {
251+
return err
252+
}
253+
254+
defer ow.Close()
255+
writers = append(writers, ow)
256+
} else {
257+
obj = new(plumbing.MemoryObject)
258+
obj.SetSize(oh.Length)
259+
obj.SetType(oh.Type)
260+
261+
writers = append(writers, obj)
262+
}
263+
}
264+
if delta && !p.scanner.IsSeekable {
265+
buf.Reset()
266+
buf.Grow(int(oh.Length))
267+
writers = append(writers, buf)
268+
}
269+
270+
mw := io.MultiWriter(writers...)
271+
272+
_, crc, err := p.scanner.NextObject(mw)
225273
if err != nil {
226274
return err
227275
}
228276

277+
// Non delta objects needs to be added into the storage. This
278+
// is only required when lazy writing is not supported.
279+
if obj != nil {
280+
if _, err := p.storage.SetEncodedObject(obj); err != nil {
281+
return err
282+
}
283+
}
284+
229285
ota.Crc32 = crc
230286
ota.Length = oh.Length
231287

232-
data := buf.Bytes()
233288
if !delta {
234-
sha1, err := getSHA1(ota.Type, data)
235-
if err != nil {
236-
return err
237-
}
289+
sha1 := hasher.Sum()
238290

239291
// Move children of placeholder parent into actual parent, in case this
240292
// was a non-external delta reference.
@@ -249,20 +301,8 @@ func (p *Parser) indexObjects() error {
249301
p.oiByHash[ota.SHA1] = ota
250302
}
251303

252-
if p.storage != nil && !delta {
253-
obj := new(plumbing.MemoryObject)
254-
obj.SetSize(oh.Length)
255-
obj.SetType(oh.Type)
256-
if _, err := obj.Write(data); err != nil {
257-
return err
258-
}
259-
260-
if _, err := p.storage.SetEncodedObject(obj); err != nil {
261-
return err
262-
}
263-
}
264-
265304
if delta && !p.scanner.IsSeekable {
305+
data := buf.Bytes()
266306
p.deltas[oh.Offset] = make([]byte, len(data))
267307
copy(p.deltas[oh.Offset], data)
268308
}
@@ -280,23 +320,29 @@ func (p *Parser) resolveDeltas() error {
280320

281321
for _, obj := range p.oi {
282322
buf.Reset()
323+
buf.Grow(int(obj.Length))
283324
err := p.get(obj, buf)
284325
if err != nil {
285326
return err
286327
}
287-
content := buf.Bytes()
288328

289329
if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
290330
return err
291331
}
292332

293-
if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil {
333+
if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, nil); err != nil {
294334
return err
295335
}
296336

297337
if !obj.IsDelta() && len(obj.Children) > 0 {
338+
// Dealing with an io.ReaderAt object, means we can
339+
// create it once and reuse across all children.
340+
r := bytes.NewReader(buf.Bytes())
298341
for _, child := range obj.Children {
299-
if err := p.resolveObject(io.Discard, child, content); err != nil {
342+
// Even though we are discarding the output, we still need to read it to
343+
// so that the scanner can advance to the next object, and the SHA1 can be
344+
// calculated.
345+
if err := p.resolveObject(io.Discard, child, r); err != nil {
300346
return err
301347
}
302348
p.resolveExternalRef(child)
@@ -361,13 +407,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
361407
if o.DiskType.IsDelta() {
362408
b := sync.GetBytesBuffer()
363409
defer sync.PutBytesBuffer(b)
410+
buf.Grow(int(o.Length))
364411
err := p.get(o.Parent, b)
365412
if err != nil {
366413
return err
367414
}
368-
base := b.Bytes()
369415

370-
err = p.resolveObject(buf, o, base)
416+
err = p.resolveObject(buf, o, bytes.NewReader(b.Bytes()))
371417
if err != nil {
372418
return err
373419
}
@@ -378,6 +424,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
378424
}
379425
}
380426

427+
// If the scanner is seekable, caching this data into
428+
// memory by offset seems wasteful.
429+
// There is a trade-off to be considered here in terms
430+
// of execution time vs memory consumption.
431+
//
432+
// TODO: improve seekable execution time, so that we can
433+
// skip this cache.
381434
if len(o.Children) > 0 {
382435
data := make([]byte, buf.Len())
383436
copy(data, buf.Bytes())
@@ -386,10 +439,25 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
386439
return nil
387440
}
388441

442+
// resolveObject resolves an object from base, using information
443+
// provided by o.
444+
//
445+
// This call has the side-effect of changing field values
446+
// from the object info o:
447+
// - Type: OFSDeltaObject may become the target type (e.g. Blob).
448+
// - Size: The size may be update with the target size.
449+
// - Hash: Zero hashes will be calculated as part of the object
450+
// resolution. Hence why this process can't be avoided even when w
451+
// is an io.Discard.
452+
//
453+
// base must be an io.ReaderAt, which is a requirement from
454+
// patchDeltaStream. The main reason being that reversing an
455+
// delta object may lead to going backs and forths within base,
456+
// which is not supported by io.Reader.
389457
func (p *Parser) resolveObject(
390458
w io.Writer,
391459
o *objectInfo,
392-
base []byte,
460+
base io.ReaderAt,
393461
) error {
394462
if !o.DiskType.IsDelta() {
395463
return nil
@@ -400,26 +468,46 @@ func (p *Parser) resolveObject(
400468
if err != nil {
401469
return err
402470
}
403-
data := buf.Bytes()
404471

405-
data, err = applyPatchBase(o, data, base)
472+
writers := []io.Writer{w}
473+
var obj *plumbing.MemoryObject
474+
var lwh objectHeaderWriter
475+
476+
if p.storage != nil {
477+
if low, ok := p.storage.(lazyObjectWriter); ok {
478+
ow, wh, err := low.LazyWriter()
479+
if err != nil {
480+
return err
481+
}
482+
lwh = wh
483+
484+
defer ow.Close()
485+
writers = append(writers, ow)
486+
} else {
487+
obj = new(plumbing.MemoryObject)
488+
ow, err := obj.Writer()
489+
if err != nil {
490+
return err
491+
}
492+
493+
writers = append(writers, ow)
494+
}
495+
}
496+
497+
mw := io.MultiWriter(writers...)
498+
499+
err = applyPatchBase(o, base, buf, mw, lwh)
406500
if err != nil {
407501
return err
408502
}
409503

410-
if p.storage != nil {
411-
obj := new(plumbing.MemoryObject)
412-
obj.SetSize(o.Size())
504+
if obj != nil {
413505
obj.SetType(o.Type)
414-
if _, err := obj.Write(data); err != nil {
415-
return err
416-
}
417-
506+
obj.SetSize(o.Size()) // Size here is correct as it was populated by applyPatchBase.
418507
if _, err := p.storage.SetEncodedObject(obj); err != nil {
419508
return err
420509
}
421510
}
422-
_, err = w.Write(data)
423511
return err
424512
}
425513

@@ -443,24 +531,31 @@ func (p *Parser) readData(w io.Writer, o *objectInfo) error {
443531
return nil
444532
}
445533

446-
func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
447-
patched, err := PatchDelta(base, data)
448-
if err != nil {
449-
return nil, err
534+
// applyPatchBase applies the patch to target.
535+
//
536+
// Note that ota will be updated based on the description in resolveObject.
537+
func applyPatchBase(ota *objectInfo, base io.ReaderAt, delta io.Reader, target io.Writer, wh objectHeaderWriter) error {
538+
if target == nil {
539+
return fmt.Errorf("cannot apply patch against nil target")
450540
}
451541

542+
typ := ota.Type
452543
if ota.SHA1 == plumbing.ZeroHash {
453-
ota.Type = ota.Parent.Type
454-
sha1, err := getSHA1(ota.Type, patched)
455-
if err != nil {
456-
return nil, err
457-
}
544+
typ = ota.Parent.Type
545+
}
546+
547+
sz, h, err := patchDeltaWriter(target, base, delta, typ, wh)
548+
if err != nil {
549+
return err
550+
}
458551

459-
ota.SHA1 = sha1
460-
ota.Length = int64(len(patched))
552+
if ota.SHA1 == plumbing.ZeroHash {
553+
ota.Type = typ
554+
ota.Length = int64(sz)
555+
ota.SHA1 = h
461556
}
462557

463-
return patched, nil
558+
return nil
464559
}
465560

466561
func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.