491
491
_full_enough_block_size = 3*1024*1024 # size at which we won't repack
492
492
_full_enough_mixed_block_size = 2*768*1024 # 1.5MB
494
def __init__(self, block, get_max_entries_per_source=None):
494
def __init__(self, block, get_compressor_settings=None):
495
495
self._block = block
496
496
# We need to preserve the ordering
497
497
self._factories = []
498
498
self._last_byte = 0
499
self._get_max = get_max_entries_per_source
500
self._max_entries_per_source = None
499
self._get_settings = get_compressor_settings
500
self._compressor_settings = None
502
def _get_max_entries_per_source(self):
503
if self._max_entries_per_source is not None:
504
return self._max_entries_per_source
506
if self._get_max is not None:
507
max_entries = self._get_max()
508
if max_entries is None:
502
def _get_compressor_settings(self):
503
if self._compressor_settings is not None:
504
return self._compressor_settings
506
if self._get_settings is not None:
507
settings = self._get_settings()
509
509
vf = GroupCompressVersionedFiles
510
max_entries = vf._DEFAULT_MAX_ENTRIES_PER_SOURCE
511
self._max_entries_per_source = max_entries
512
return self._max_entries_per_source
510
settings = vf._DEFAULT_COMPRESSOR_SETTINGS
511
self._compressor_settings = settings
512
return self._compressor_settings
514
514
def add_factory(self, key, parents, start, end):
515
515
if not self._factories:
549
549
self._block = new_block
551
551
def _make_group_compressor(self):
552
return GroupCompressor(self._get_max_entries_per_source())
552
return GroupCompressor(self._get_compressor_settings())
554
554
def _rebuild_block(self):
555
555
"""Create a new GroupCompressBlock with only the referenced texts."""
574
574
# TODO: If the content would have expanded, then we would want to
575
575
# handle a case where we need to split the block.
576
576
# Now that we have a user-tweakable option
577
# (max_entries_per_source), it is possible that one person set it
577
# (max_bytes_to_index), it is possible that one person set it
578
578
# to a very low value, causing poor compression.
579
579
delta = time.time() - tstart
580
580
self._block = new_block
934
934
class PythonGroupCompressor(_CommonGroupCompressor):
936
def __init__(self, max_entries_per_source=None):
936
def __init__(self, max_bytes_to_index=None):
937
937
"""Create a GroupCompressor.
939
939
Used only if the pyrex version is not available.
995
def __init__(self, max_entries_per_source=None):
995
def __init__(self, settings=None):
996
996
super(PyrexGroupCompressor, self).__init__()
997
self._delta_index = DeltaIndex(
998
max_entries_per_source=max_entries_per_source)
998
max_bytes_to_index = \
999
GroupCompressVersionedFiles._DEFAULT_MAX_BYTES_TO_INDEX
1001
(max_bytes_to_index,) = settings
1002
self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index)
1000
1004
def _compress(self, key, bytes, max_delta_size, soft=False):
1001
1005
"""see _CommonGroupCompressor._compress"""
1097
1101
currently pending batch.
1100
def __init__(self, gcvf, locations, get_max_entries_per_source=None):
1104
def __init__(self, gcvf, locations, get_compressor_settings=None):
1101
1105
self.gcvf = gcvf
1102
1106
self.locations = locations
1188
1192
block = self.batch_memos[read_memo]
1189
1193
self.manager = _LazyGroupContentManager(block,
1190
get_max_entries_per_source=self._get_max_entries_per_source)
1194
get_compressor_settings=self._get_compressor_settings)
1191
1195
self.last_read_memo = read_memo
1192
1196
start, end = index_memo[3:5]
1193
1197
self.manager.add_factory(key, parents, start, end)
1211
1215
# local blocks. Either way, 'improved resolution' is not very helpful,
1212
1216
# versus running out of memory trying to track everything. The default max
1213
1217
# gives 100% sampling of a 1MB file.
1214
_DEFAULT_MAX_ENTRIES_PER_SOURCE = 1024 * 1024 / 16
1218
_DEFAULT_MAX_BYTES_TO_INDEX = 1024 * 1024
1219
_DEFAULT_COMPRESSOR_SETTINGS = (_DEFAULT_MAX_BYTES_TO_INDEX,)
1216
1221
def __init__(self, index, access, delta=True, _unadded_refs=None,
1217
1222
_group_cache=None):
1233
1238
_group_cache = LRUSizeCache(max_size=50*1024*1024)
1234
1239
self._group_cache = _group_cache
1235
1240
self._immediate_fallback_vfs = []
1236
self._max_entries_per_source = None
1241
self._max_bytes_to_index = None
1238
1243
def without_fallbacks(self):
1239
1244
"""Return a clone of this object without any fallbacks configured."""
1614
1619
# - we run out of keys, or
1615
1620
# - the total bytes to retrieve for this batch > BATCH_SIZE
1616
1621
batcher = _BatchingBlockFetcher(self, locations,
1617
get_max_entries_per_source=self._get_max_entries_per_source)
1622
get_compressor_settings=self._get_compressor_settings)
1618
1623
for source, keys in source_keys:
1619
1624
if source is self:
1620
1625
for key in keys:
1666
1671
for _ in self._insert_record_stream(stream, random_id=False):
1669
def _get_max_entries_per_source(self):
1670
if self._max_entries_per_source is None:
1674
def _get_compressor_settings(self):
1675
if self._max_bytes_to_index is None:
1671
1676
# TODO: VersionedFiles don't know about their containing
1672
1677
# repository, so they don't have much of an idea about their
1673
1678
# location. So for now, this is only a global option.
1674
1679
c = config.GlobalConfig()
1675
val = c.get_user_option('bzr.groupcompress.max_entries_per_source')
1680
val = c.get_user_option('bzr.groupcompress.max_bytes_to_index')
1676
1681
if val is not None:
1679
1684
except ValueError, e:
1680
1685
trace.warning('Value for '
1681
'"bzr.groupcompress.max_entries_per_source"'
1686
'"bzr.groupcompress.max_bytes_to_index"'
1682
1687
' %r is not an integer'
1685
1690
if val is None:
1686
val = self._DEFAULT_MAX_ENTRIES_PER_SOURCE
1687
self._max_entries_per_source = val
1688
return self._max_entries_per_source
1691
val = self._DEFAULT_MAX_BYTES_TO_INDEX
1692
self._max_bytes_to_index = val
1693
return (self._max_bytes_to_index,)
1690
1695
def _make_group_compressor(self):
1691
return GroupCompressor(self._get_max_entries_per_source())
1696
return GroupCompressor(self._get_compressor_settings())
1693
1698
def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,
1694
1699
reuse_blocks=True):