@@ -3,6 +3,7 @@ package packfile
3
3
import (
4
4
"bytes"
5
5
"errors"
6
+ "fmt"
6
7
"io"
7
8
8
9
"github.com/go-git/go-git/v5/plumbing"
@@ -174,13 +175,25 @@ func (p *Parser) init() error {
174
175
return nil
175
176
}
176
177
178
+ type objectHeaderWriter func (typ plumbing.ObjectType , sz int64 ) error
179
+
180
+ type lazyObjectWriter interface {
181
+ // LazyWriter enables an object to be lazily written.
182
+ // It returns:
183
+ // - w: a writer to receive the object's content.
184
+ // - lwh: a func to write the object header.
185
+ // - err: any error from the initial writer creation process.
186
+ //
187
+ // Note that if the object header is not written BEFORE the writer
188
+ // is used, this will result in an invalid object.
189
+ LazyWriter () (w io.WriteCloser , lwh objectHeaderWriter , err error )
190
+ }
191
+
177
192
func (p * Parser ) indexObjects () error {
178
193
buf := sync .GetBytesBuffer ()
179
194
defer sync .PutBytesBuffer (buf )
180
195
181
196
for i := uint32 (0 ); i < p .count ; i ++ {
182
- buf .Reset ()
183
-
184
197
oh , err := p .scanner .NextObjectHeader ()
185
198
if err != nil {
186
199
return err
@@ -220,21 +233,60 @@ func (p *Parser) indexObjects() error {
220
233
ota = newBaseObject (oh .Offset , oh .Length , t )
221
234
}
222
235
223
- buf .Grow (int (oh .Length ))
224
- _ , crc , err := p .scanner .NextObject (buf )
236
+ hasher := plumbing .NewHasher (oh .Type , oh .Length )
237
+ writers := []io.Writer {hasher }
238
+ var obj * plumbing.MemoryObject
239
+
240
+ // Lazy writing is only available for non-delta objects.
241
+ if p .storage != nil && ! delta {
242
+ // When a storage is set and supports lazy writing,
243
+ // use that instead of creating a memory object.
244
+ if low , ok := p .storage .(lazyObjectWriter ); ok {
245
+ ow , lwh , err := low .LazyWriter ()
246
+ if err != nil {
247
+ return err
248
+ }
249
+
250
+ if err = lwh (oh .Type , oh .Length ); err != nil {
251
+ return err
252
+ }
253
+
254
+ defer ow .Close ()
255
+ writers = append (writers , ow )
256
+ } else {
257
+ obj = new (plumbing.MemoryObject )
258
+ obj .SetSize (oh .Length )
259
+ obj .SetType (oh .Type )
260
+
261
+ writers = append (writers , obj )
262
+ }
263
+ }
264
+ if delta && ! p .scanner .IsSeekable {
265
+ buf .Reset ()
266
+ buf .Grow (int (oh .Length ))
267
+ writers = append (writers , buf )
268
+ }
269
+
270
+ mw := io .MultiWriter (writers ... )
271
+
272
+ _ , crc , err := p .scanner .NextObject (mw )
225
273
if err != nil {
226
274
return err
227
275
}
228
276
277
+ // Non delta objects needs to be added into the storage. This
278
+ // is only required when lazy writing is not supported.
279
+ if obj != nil {
280
+ if _ , err := p .storage .SetEncodedObject (obj ); err != nil {
281
+ return err
282
+ }
283
+ }
284
+
229
285
ota .Crc32 = crc
230
286
ota .Length = oh .Length
231
287
232
- data := buf .Bytes ()
233
288
if ! delta {
234
- sha1 , err := getSHA1 (ota .Type , data )
235
- if err != nil {
236
- return err
237
- }
289
+ sha1 := hasher .Sum ()
238
290
239
291
// Move children of placeholder parent into actual parent, in case this
240
292
// was a non-external delta reference.
@@ -249,20 +301,8 @@ func (p *Parser) indexObjects() error {
249
301
p .oiByHash [ota .SHA1 ] = ota
250
302
}
251
303
252
- if p .storage != nil && ! delta {
253
- obj := new (plumbing.MemoryObject )
254
- obj .SetSize (oh .Length )
255
- obj .SetType (oh .Type )
256
- if _ , err := obj .Write (data ); err != nil {
257
- return err
258
- }
259
-
260
- if _ , err := p .storage .SetEncodedObject (obj ); err != nil {
261
- return err
262
- }
263
- }
264
-
265
304
if delta && ! p .scanner .IsSeekable {
305
+ data := buf .Bytes ()
266
306
p .deltas [oh .Offset ] = make ([]byte , len (data ))
267
307
copy (p .deltas [oh .Offset ], data )
268
308
}
@@ -280,23 +320,29 @@ func (p *Parser) resolveDeltas() error {
280
320
281
321
for _ , obj := range p .oi {
282
322
buf .Reset ()
323
+ buf .Grow (int (obj .Length ))
283
324
err := p .get (obj , buf )
284
325
if err != nil {
285
326
return err
286
327
}
287
- content := buf .Bytes ()
288
328
289
329
if err := p .onInflatedObjectHeader (obj .Type , obj .Length , obj .Offset ); err != nil {
290
330
return err
291
331
}
292
332
293
- if err := p .onInflatedObjectContent (obj .SHA1 , obj .Offset , obj .Crc32 , content ); err != nil {
333
+ if err := p .onInflatedObjectContent (obj .SHA1 , obj .Offset , obj .Crc32 , nil ); err != nil {
294
334
return err
295
335
}
296
336
297
337
if ! obj .IsDelta () && len (obj .Children ) > 0 {
338
+ // Dealing with an io.ReaderAt object, means we can
339
+ // create it once and reuse across all children.
340
+ r := bytes .NewReader (buf .Bytes ())
298
341
for _ , child := range obj .Children {
299
- if err := p .resolveObject (io .Discard , child , content ); err != nil {
342
+ // Even though we are discarding the output, we still need to read it to
343
+ // so that the scanner can advance to the next object, and the SHA1 can be
344
+ // calculated.
345
+ if err := p .resolveObject (io .Discard , child , r ); err != nil {
300
346
return err
301
347
}
302
348
p .resolveExternalRef (child )
@@ -361,13 +407,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
361
407
if o .DiskType .IsDelta () {
362
408
b := sync .GetBytesBuffer ()
363
409
defer sync .PutBytesBuffer (b )
410
+ buf .Grow (int (o .Length ))
364
411
err := p .get (o .Parent , b )
365
412
if err != nil {
366
413
return err
367
414
}
368
- base := b .Bytes ()
369
415
370
- err = p .resolveObject (buf , o , base )
416
+ err = p .resolveObject (buf , o , bytes . NewReader ( b . Bytes ()) )
371
417
if err != nil {
372
418
return err
373
419
}
@@ -378,6 +424,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
378
424
}
379
425
}
380
426
427
+ // If the scanner is seekable, caching this data into
428
+ // memory by offset seems wasteful.
429
+ // There is a trade-off to be considered here in terms
430
+ // of execution time vs memory consumption.
431
+ //
432
+ // TODO: improve seekable execution time, so that we can
433
+ // skip this cache.
381
434
if len (o .Children ) > 0 {
382
435
data := make ([]byte , buf .Len ())
383
436
copy (data , buf .Bytes ())
@@ -386,10 +439,25 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
386
439
return nil
387
440
}
388
441
442
+ // resolveObject resolves an object from base, using information
443
+ // provided by o.
444
+ //
445
+ // This call has the side-effect of changing field values
446
+ // from the object info o:
447
+ // - Type: OFSDeltaObject may become the target type (e.g. Blob).
448
+ // - Size: The size may be update with the target size.
449
+ // - Hash: Zero hashes will be calculated as part of the object
450
+ // resolution. Hence why this process can't be avoided even when w
451
+ // is an io.Discard.
452
+ //
453
+ // base must be an io.ReaderAt, which is a requirement from
454
+ // patchDeltaStream. The main reason being that reversing an
455
+ // delta object may lead to going backs and forths within base,
456
+ // which is not supported by io.Reader.
389
457
func (p * Parser ) resolveObject (
390
458
w io.Writer ,
391
459
o * objectInfo ,
392
- base [] byte ,
460
+ base io. ReaderAt ,
393
461
) error {
394
462
if ! o .DiskType .IsDelta () {
395
463
return nil
@@ -400,26 +468,46 @@ func (p *Parser) resolveObject(
400
468
if err != nil {
401
469
return err
402
470
}
403
- data := buf .Bytes ()
404
471
405
- data , err = applyPatchBase (o , data , base )
472
+ writers := []io.Writer {w }
473
+ var obj * plumbing.MemoryObject
474
+ var lwh objectHeaderWriter
475
+
476
+ if p .storage != nil {
477
+ if low , ok := p .storage .(lazyObjectWriter ); ok {
478
+ ow , wh , err := low .LazyWriter ()
479
+ if err != nil {
480
+ return err
481
+ }
482
+ lwh = wh
483
+
484
+ defer ow .Close ()
485
+ writers = append (writers , ow )
486
+ } else {
487
+ obj = new (plumbing.MemoryObject )
488
+ ow , err := obj .Writer ()
489
+ if err != nil {
490
+ return err
491
+ }
492
+
493
+ writers = append (writers , ow )
494
+ }
495
+ }
496
+
497
+ mw := io .MultiWriter (writers ... )
498
+
499
+ err = applyPatchBase (o , base , buf , mw , lwh )
406
500
if err != nil {
407
501
return err
408
502
}
409
503
410
- if p .storage != nil {
411
- obj := new (plumbing.MemoryObject )
412
- obj .SetSize (o .Size ())
504
+ if obj != nil {
413
505
obj .SetType (o .Type )
414
- if _ , err := obj .Write (data ); err != nil {
415
- return err
416
- }
417
-
506
+ obj .SetSize (o .Size ()) // Size here is correct as it was populated by applyPatchBase.
418
507
if _ , err := p .storage .SetEncodedObject (obj ); err != nil {
419
508
return err
420
509
}
421
510
}
422
- _ , err = w .Write (data )
423
511
return err
424
512
}
425
513
@@ -443,24 +531,31 @@ func (p *Parser) readData(w io.Writer, o *objectInfo) error {
443
531
return nil
444
532
}
445
533
446
- func applyPatchBase (ota * objectInfo , data , base []byte ) ([]byte , error ) {
447
- patched , err := PatchDelta (base , data )
448
- if err != nil {
449
- return nil , err
534
+ // applyPatchBase applies the patch to target.
535
+ //
536
+ // Note that ota will be updated based on the description in resolveObject.
537
+ func applyPatchBase (ota * objectInfo , base io.ReaderAt , delta io.Reader , target io.Writer , wh objectHeaderWriter ) error {
538
+ if target == nil {
539
+ return fmt .Errorf ("cannot apply patch against nil target" )
450
540
}
451
541
542
+ typ := ota .Type
452
543
if ota .SHA1 == plumbing .ZeroHash {
453
- ota .Type = ota .Parent .Type
454
- sha1 , err := getSHA1 (ota .Type , patched )
455
- if err != nil {
456
- return nil , err
457
- }
544
+ typ = ota .Parent .Type
545
+ }
546
+
547
+ sz , h , err := patchDeltaWriter (target , base , delta , typ , wh )
548
+ if err != nil {
549
+ return err
550
+ }
458
551
459
- ota .SHA1 = sha1
460
- ota .Length = int64 (len (patched ))
552
+ if ota .SHA1 == plumbing .ZeroHash {
553
+ ota .Type = typ
554
+ ota .Length = int64 (sz )
555
+ ota .SHA1 = h
461
556
}
462
557
463
- return patched , nil
558
+ return nil
464
559
}
465
560
466
561
func getSHA1 (t plumbing.ObjectType , data []byte ) (plumbing.Hash , error ) {
0 commit comments