137
134
% (num_bytes, self._content_length))
138
135
# Expand the content if required
139
136
if self._content is None:
137
if self._content_chunks is not None:
138
self._content = ''.join(self._content_chunks)
139
self._content_chunks = None
140
if self._content is None:
140
141
if self._z_content is None:
141
142
raise AssertionError('No content to decompress')
142
143
if self._z_content == '':
273
274
bytes = apply_delta_to_source(self._content, content_start, end)
277
def set_chunked_content(self, content_chunks, length):
278
"""Set the content of this block to the given chunks."""
279
# If we have lots of short lines, it is may be more efficient to join
280
# the content ahead of time. If the content is <10MiB, we don't really
281
# care about the extra memory consumption, so we can just pack it and
282
# be done. However, timing showed 18s => 17.9s for repacking 1k revs of
283
# mysql, which is below the noise margin
284
self._content_length = length
285
self._content_chunks = content_chunks
287
self._z_content = None
276
289
def set_content(self, content):
277
290
"""Set the content of this block."""
278
291
self._content_length = len(content)
279
292
self._content = content
280
293
self._z_content = None
295
def _create_z_content_using_lzma(self):
296
if self._content_chunks is not None:
297
self._content = ''.join(self._content_chunks)
298
self._content_chunks = None
299
if self._content is None:
300
raise AssertionError('Nothing to compress')
301
self._z_content = pylzma.compress(self._content)
302
self._z_content_length = len(self._z_content)
304
def _create_z_content_from_chunks(self):
305
compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
306
compressed_chunks = map(compressor.compress, self._content_chunks)
307
compressed_chunks.append(compressor.flush())
308
self._z_content = ''.join(compressed_chunks)
309
self._z_content_length = len(self._z_content)
311
def _create_z_content(self):
312
if self._z_content is not None:
315
self._create_z_content_using_lzma()
317
if self._content_chunks is not None:
318
self._create_z_content_from_chunks()
320
self._z_content = zlib.compress(self._content)
321
self._z_content_length = len(self._z_content)
282
323
def to_bytes(self):
283
324
"""Encode the information into a byte stream."""
284
compress = zlib.compress
286
compress = pylzma.compress
287
if self._z_content is None:
288
if self._content is None:
289
raise AssertionError('Nothing to compress')
290
self._z_content = compress(self._content)
291
self._z_content_length = len(self._z_content)
325
self._create_z_content()
293
327
header = self.GCB_LZ_HEADER
762
796
# for 'commit' down to ~1x the size of the largest file, at a
763
797
# cost of increased complexity within this code. 2x is still <<
764
798
# 3x the size of the largest file, so we are doing ok.
765
content = ''.join(self.chunks)
799
self._block.set_chunked_content(self.chunks, self.endpoint)
766
800
self.chunks = None
767
801
self._delta_index = None
768
self._block.set_content(content)
769
802
return self._block
771
804
def pop_last(self):
905
938
self.endpoint = endpoint
908
def make_pack_factory(graph, delta, keylength):
941
def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):
909
942
"""Create a factory for creating a pack based groupcompress.
911
944
This is only functional enough to run interface tests, it doesn't try to
926
959
writer = pack.ContainerWriter(stream.write)
928
961
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
929
add_callback=graph_index.add_nodes)
962
add_callback=graph_index.add_nodes,
963
inconsistency_fatal=inconsistency_fatal)
930
964
access = knit._DirectPackAccess({})
931
965
access.set_writer(writer, graph_index, (transport, 'newpack'))
932
966
result = GroupCompressVersionedFiles(index, access, delta)
1008
1042
nostore_sha=nostore_sha))[0]
1009
1043
return sha1, length, None
1045
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
1046
"""See VersionedFiles._add_text()."""
1047
self._index._check_write_ok()
1048
self._check_add(key, None, random_id, check_content=False)
1049
if text.__class__ is not str:
1050
raise errors.BzrBadParameterUnicode("text")
1052
# The caller might pass None if there is no graph data, but kndx
1053
# indexes can't directly store that, so we give them
1054
# an empty tuple instead.
1056
# double handling for now. Make it work until then.
1058
record = FulltextContentFactory(key, parents, None, text)
1059
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1060
nostore_sha=nostore_sha))[0]
1061
return sha1, length, None
1011
1063
def add_fallback_versioned_files(self, a_versioned_files):
1012
1064
"""Add a source of texts for texts not present in this knit.
1018
1070
def annotate(self, key):
1019
1071
"""See VersionedFiles.annotate."""
1021
parent_map = self.get_parent_map([key])
1023
raise errors.RevisionNotPresent(key, self)
1024
if parent_map[key] is not None:
1025
parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])
1027
keys = parent_map.keys()
1030
parent_map = {key:()}
1031
# We used Graph(self) to load the parent_map, but now that we have it,
1032
# we can just query the parent map directly, so create a KnownGraph
1033
heads_provider = _mod_graph.KnownGraph(parent_map)
1035
reannotate = annotate.reannotate
1036
for record in self.get_record_stream(keys, 'topological', True):
1038
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1039
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
1040
parent_cache[key] = list(
1041
reannotate(parent_lines, lines, key, None, heads_provider))
1042
return parent_cache[key]
1072
ann = annotate.Annotator(self)
1073
return ann.annotate_flat(key)
1075
def get_annotator(self):
1076
return annotate.Annotator(self)
1044
1078
def check(self, progress_bar=None):
1045
1079
"""See VersionedFiles.check()."""
1532
1564
'unordered', True)):
1533
1565
# XXX: todo - optimise to use less than full texts.
1534
1566
key = record.key
1535
pb.update('Walking content', key_idx, total)
1568
pb.update('Walking content', key_idx, total)
1536
1569
if record.storage_kind == 'absent':
1537
1570
raise errors.RevisionNotPresent(key, self)
1538
1571
lines = osutils.split_lines(record.get_bytes_as('fulltext'))
1539
1572
for line in lines:
1540
1573
yield line, key
1541
pb.update('Walking content', total, total)
1575
pb.update('Walking content', total, total)
1543
1577
def keys(self):
1544
1578
"""See VersionedFiles.keys."""
1555
1589
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1557
1591
def __init__(self, graph_index, is_locked, parents=True,
1558
add_callback=None, track_external_parent_refs=False):
1592
add_callback=None, track_external_parent_refs=False,
1593
inconsistency_fatal=True):
1559
1594
"""Construct a _GCGraphIndex on a graph_index.
1561
1596
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1569
1604
:param track_external_parent_refs: As keys are added, keep track of the
1570
1605
keys they reference, so that we can query get_missing_parents(),
1607
:param inconsistency_fatal: When asked to add records that are already
1608
present, and the details are inconsistent with the existing
1609
record, raise an exception instead of warning (and skipping the
1573
1612
self._add_callback = add_callback
1574
1613
self._graph_index = graph_index
1575
1614
self._parents = parents
1576
1615
self.has_graph = parents
1577
1616
self._is_locked = is_locked
1617
self._inconsistency_fatal = inconsistency_fatal
1578
1618
if track_external_parent_refs:
1579
1619
self._key_dependencies = knit._KeyRefs()
1616
1656
present_nodes = self._get_entries(keys)
1617
1657
for (index, key, value, node_refs) in present_nodes:
1618
1658
if node_refs != keys[key][1]:
1619
raise errors.KnitCorrupt(self, "inconsistent details in add_records"
1620
": %s %s" % ((value, node_refs), keys[key]))
1659
details = '%s %s %s' % (key, (value, node_refs), keys[key])
1660
if self._inconsistency_fatal:
1661
raise errors.KnitCorrupt(self, "inconsistent details"
1662
" in add_records: %s" %
1665
trace.warning("inconsistent details in skipped"
1666
" record: %s", details)
1668
1714
if check_present:
1669
1715
missing_keys = keys.difference(found_keys)
1670
1716
if missing_keys:
1671
raise RevisionNotPresent(missing_keys.pop(), self)
1717
raise errors.RevisionNotPresent(missing_keys.pop(), self)
1673
1719
def get_parent_map(self, keys):
1674
1720
"""Get a map of the parents of keys.