541
543
# TODO: Consider setting self._factories = None after the above loop,
542
544
# as it will break the reference cycle
546
def _trim_block(self, last_byte):
547
"""Create a new GroupCompressBlock, with just some of the content."""
548
# None of the factories need to be adjusted, because the content is
549
# located in an identical place. Just that some of the unreferenced
550
# trailing bytes are stripped
551
trace.mutter('stripping trailing bytes from groupcompress block'
552
' %d => %d', self._block._content_length, last_byte)
553
new_block = GroupCompressBlock()
554
self._block._ensure_content(last_byte)
555
new_block.set_content(self._block._content[:last_byte])
556
self._block = new_block
558
def _rebuild_block(self):
559
"""Create a new GroupCompressBlock with only the referenced texts."""
560
compressor = GroupCompressor()
562
old_length = self._block._content_length
564
for factory in self._factories:
565
bytes = factory.get_bytes_as('fulltext')
566
(found_sha1, end_point, type,
567
length) = compressor.compress(factory.key, bytes, factory.sha1)
568
# Now update this factory with the new offsets, etc
569
factory.sha1 = found_sha1
570
factory._start = cur_endpoint
571
factory._end = end_point
572
cur_endpoint = end_point
573
new_block = compressor.flush()
574
# TODO: Should we check that new_block really *is* smaller than the old
575
# block? It seems hard to come up with a method that it would
576
# expand, since we do full compression again. Perhaps based on a
577
# request that ends up poorly ordered?
578
delta = time.time() - tstart
579
self._block = new_block
580
trace.mutter('creating new compressed block on-the-fly in %.3fs'
581
' %d bytes => %d bytes', delta, old_length,
582
self._block._content_length)
584
def _check_rebuild_block(self):
585
"""Check to see if our block should be repacked."""
588
for factory in self._factories:
589
total_bytes_used += factory._end - factory._start
590
last_byte_used = max(last_byte_used, factory._end)
591
# If we are using most of the bytes from the block, we have nothing
592
# else to check (currently more that 1/2)
593
if total_bytes_used * 2 >= self._block._content_length:
595
# Can we just strip off the trailing bytes? If we are going to be
596
# transmitting more than 50% of the front of the content, go ahead
597
if total_bytes_used * 2 > last_byte_used:
598
self._trim_block(last_byte_used)
601
# We are using a small amount of the data, and it isn't just packed
602
# nicely at the front, so rebuild the content.
603
# Note: This would be *nicer* as a strip-data-from-group, rather than
604
# building it up again from scratch
605
# It might be reasonable to consider the fulltext sizes for
606
# different bits when deciding this, too. As you may have a small
607
# fulltext, and a trivial delta, and you are just trading around
608
# for another fulltext. If we do a simple 'prune' you may end up
609
# expanding many deltas into fulltexts, as well.
610
# If we build a cheap enough 'strip', then we could try a strip,
611
# if that expands the content, we then rebuild.
612
self._rebuild_block()
544
614
def _wire_bytes(self):
545
615
"""Return a byte stream suitable for transmitting over the wire."""
546
616
# TODO: this might be a really good time to determine that we want to