100
100
RevisionNotPresent,
101
101
RevisionAlreadyPresent,
103
from bzrlib.tuned_gzip import GzipFile
103
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
104
104
from bzrlib.osutils import (
105
105
contains_whitespace,
106
106
contains_linebreaks,
109
110
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
253
254
def parse_line_delta_iter(self, lines):
254
255
return iter(self.parse_line_delta(lines))
256
def parse_line_delta(self, lines, version_id):
257
def parse_line_delta(self, lines, version_id, plain=False):
257
258
"""Convert a line based delta into internal representation.
259
260
line delta is in the form of:
262
263
revid(utf8) newline\n
263
264
internal representation is
264
265
(start, end, count, [1..count tuples (revid, newline)])
267
:param plain: If True, the lines are returned as a plain
268
list, not as a list of tuples, i.e.
269
(start, end, count, [1..count newline])
267
272
lines = iter(lines)
273
278
return cache.setdefault(origin, origin), text
275
280
# walk through the lines parsing.
277
start, end, count = [int(n) for n in header.split(',')]
278
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
279
result.append((start, end, count, contents))
281
# Note that the plain test is explicitly pulled out of the
282
# loop to minimise any performance impact
285
start, end, count = [int(n) for n in header.split(',')]
286
contents = [next().split(' ', 1)[1] for i in xrange(count)]
287
result.append((start, end, count, contents))
290
start, end, count = [int(n) for n in header.split(',')]
291
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
292
result.append((start, end, count, contents))
282
295
def get_fulltext_content(self, lines):
820
833
"""See VersionedFile.add_lines_with_ghosts()."""
821
834
self._check_add(version_id, lines, random_id, check_content)
822
835
return self._add(version_id, lines, parents, self.delta,
823
parent_texts, None, nostore_sha)
836
parent_texts, None, nostore_sha, random_id)
825
838
def _add_lines(self, version_id, parents, lines, parent_texts,
826
839
left_matching_blocks, nostore_sha, random_id, check_content):
828
841
self._check_add(version_id, lines, random_id, check_content)
829
842
self._check_versions_present(parents)
830
843
return self._add(version_id, lines[:], parents, self.delta,
831
parent_texts, left_matching_blocks, nostore_sha)
844
parent_texts, left_matching_blocks, nostore_sha, random_id)
833
846
def _check_add(self, version_id, lines, random_id, check_content):
834
847
"""check that version_id and lines are safe to add."""
846
859
self._check_lines_are_lines(lines)
848
861
def _add(self, version_id, lines, parents, delta, parent_texts,
849
left_matching_blocks, nostore_sha):
862
left_matching_blocks, nostore_sha, random_id):
850
863
"""Add a set of lines on top of version specified by parents.
852
865
If delta is true, compress the text as a line-delta against
855
868
Any versions not present will be converted into ghosts.
857
# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)
858
# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)
859
# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)
860
# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)
861
# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)
862
# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)
863
# +1383 0 8.0370 8.0370 +<len>
864
# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)
865
# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)
866
# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)
867
# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)
870
# first thing, if the content is something we don't need to store, find
872
line_bytes = ''.join(lines)
873
digest = sha_string(line_bytes)
874
if nostore_sha == digest:
875
raise errors.ExistingContent
869
877
present_parents = []
870
878
if parent_texts is None:
879
887
present_parents[0] != parents[0])):
882
digest = sha_strings(lines)
883
if nostore_sha == digest:
884
raise errors.ExistingContent
885
text_length = sum(map(len, lines))
890
text_length = len(line_bytes)
888
893
if lines[-1][-1] != '\n':
909
914
options.append('line-delta')
910
915
store_lines = self.factory.lower_line_delta(delta_hunks)
916
size, bytes = self._data._record_to_data(version_id, digest,
912
919
options.append('fulltext')
920
# get mixed annotation + content and feed it into the
913
922
store_lines = self.factory.lower_fulltext(content)
923
size, bytes = self._data._record_to_data(version_id, digest,
915
access_memo = self._data.add_record(version_id, digest, store_lines)
916
self._index.add_version(version_id, options, access_memo, parents)
926
access_memo = self._data.add_raw_records([size], bytes)[0]
927
self._index.add_versions(
928
((version_id, options, access_memo, parents),),
917
930
return digest, text_length, content
919
932
def check(self, progress_bar=None):
1022
1035
text_map[version_id] = text
1023
1036
return text_map, final_content
1039
def _apply_delta(lines, delta):
1040
"""Apply delta to lines."""
1043
for start, end, count, delta_lines in delta:
1044
lines[offset+start:offset+end] = delta_lines
1045
offset = offset + (start - end) + count
1025
1048
def iter_lines_added_or_present_in_versions(self, version_ids=None,
1027
1050
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1359
1382
"""Add a version record to the index."""
1360
1383
self.add_versions(((version_id, options, index_memo, parents),))
1362
def add_versions(self, versions):
1385
def add_versions(self, versions, random_id=False):
1363
1386
"""Add multiple versions to the index.
1365
1388
:param versions: a list of tuples:
1366
1389
(version_id, options, pos, size, parents).
1390
:param random_id: If True the ids being added were randomly generated
1391
and no check for existence will be performed.
1369
1394
orig_history = self._history[:]
1699
1724
"""Add a version record to the index."""
1700
1725
return self.add_versions(((version_id, options, access_memo, parents),))
1702
def add_versions(self, versions):
1727
def add_versions(self, versions, random_id=False):
1703
1728
"""Add multiple versions to the index.
1705
1730
This function does not insert data into the Immutable GraphIndex
1710
1735
:param versions: a list of tuples:
1711
1736
(version_id, options, pos, size, parents).
1737
:param random_id: If True the ids being added were randomly generated
1738
and no check for existence will be performed.
1713
1740
if not self._add_callback:
1714
1741
raise errors.ReadOnlyError(self)
1743
1770
"in parentless index.")
1745
1772
keys[key] = (value, node_refs)
1746
present_nodes = self._get_entries(keys)
1747
for (index, key, value, node_refs) in present_nodes:
1748
if (value, node_refs) != keys[key]:
1749
raise KnitCorrupt(self, "inconsistent details in add_versions"
1750
": %s %s" % ((value, node_refs), keys[key]))
1774
present_nodes = self._get_entries(keys)
1775
for (index, key, value, node_refs) in present_nodes:
1776
if (value, node_refs) != keys[key]:
1777
raise KnitCorrupt(self, "inconsistent details in add_versions"
1778
": %s %s" % ((value, node_refs), keys[key]))
1753
1781
if self._parents:
1754
1782
for key, (value, node_refs) in keys.iteritems():
1969
1997
:return: (len, a StringIO instance with the raw data ready to read.)
1972
data_file = GzipFile(None, mode='wb', fileobj=sio,
1973
compresslevel=Z_DEFAULT_COMPRESSION)
1975
assert isinstance(version_id, str)
1976
data_file.writelines(chain(
1999
bytes = (''.join(chain(
1977
2000
["version %s %d %s\n" % (version_id,
1981
["end %s\n" % version_id]))
2004
["end %s\n" % version_id])))
2005
assert bytes.__class__ == str
2006
compressed_bytes = bytes_to_gzip(bytes)
2007
return len(compressed_bytes), compressed_bytes
1988
2009
def add_raw_records(self, sizes, raw_data):
1989
2010
"""Append a prepared record to the data file.
1997
2018
return self._access.add_raw_records(sizes, raw_data)
1999
def add_record(self, version_id, digest, lines):
2000
"""Write new text record to disk.
2002
Returns index data for retrieving it later, as per add_raw_records.
2004
size, sio = self._record_to_data(version_id, digest, lines)
2005
result = self.add_raw_records([size], sio.getvalue())
2007
self._cache[version_id] = sio.getvalue()
2010
2020
def _parse_record_header(self, version_id, raw_data):
2011
2021
"""Parse a record header for consistency.
2173
2183
assert isinstance(self.source, KnitVersionedFile)
2174
2184
assert isinstance(self.target, KnitVersionedFile)
2186
# If the source and target are mismatched w.r.t. annotations vs
2187
# plain, the data needs to be converted accordingly
2188
if self.source.factory.annotated == self.target.factory.annotated:
2190
elif self.source.factory.annotated:
2191
converter = self._anno_to_plain_converter
2193
# We're converting from a plain to an annotated knit. This requires
2194
# building the annotations from scratch. The generic join code
2195
# handles this implicitly so we delegate to it.
2196
return super(InterKnit, self).join(pb, msg, version_ids,
2176
2199
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2178
2200
if not version_ids:
2232
2254
assert version_id == version_id2, 'logic error, inconsistent results'
2233
2255
count = count + 1
2234
2256
pb.update("Joining knit", count, total)
2235
raw_records.append((version_id, options, parents, len(raw_data)))
2258
size, raw_data = converter(raw_data, version_id, options,
2261
size = len(raw_data)
2262
raw_records.append((version_id, options, parents, size))
2236
2263
raw_datum.append(raw_data)
2237
2264
self.target._add_raw_records(raw_records, ''.join(raw_datum))
2269
def _anno_to_plain_converter(self, raw_data, version_id, options,
2271
"""Convert annotated content to plain content."""
2272
data, digest = self.source._data._parse_record(version_id, raw_data)
2273
if 'fulltext' in options:
2274
content = self.source.factory.parse_fulltext(data, version_id)
2275
lines = self.target.factory.lower_fulltext(content)
2277
delta = self.source.factory.parse_line_delta(data, version_id,
2279
lines = self.target.factory.lower_line_delta(delta)
2280
return self.target._data._record_to_data(version_id, digest, lines)
2243
2283
InterVersionedFile.register_optimiser(InterKnit)