66
66
from copy import copy
67
67
from cStringIO import StringIO
70
69
from itertools import izip, chain
75
74
import bzrlib.errors as errors
76
75
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
77
76
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
78
77
RevisionNotPresent, RevisionAlreadyPresent
78
from bzrlib.tuned_gzip import *
79
79
from bzrlib.trace import mutter
80
80
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
161
161
return KnitContent(lines)
163
163
def parse_line_delta_iter(self, lines):
164
for result_item in self.parse_line_delta[lines]:
167
def parse_line_delta(self, lines, version):
164
168
"""Convert a line based delta into internal representation.
166
170
line delta is in the form of:
170
174
internal represnetation is
171
175
(start, end, count, [1..count tuples (revid, newline)])
174
header = lines.pop(0)
175
start, end, c = [int(n) for n in header.split(',')]
180
# walk through the lines parsing.
182
start, end, count = [int(n) for n in header.split(',')]
178
origin, text = lines.pop(0).split(' ', 1)
186
origin, text = next().split(' ', 1)
179
188
contents.append((origin.decode('utf-8'), text))
180
yield start, end, c, contents
182
def parse_line_delta(self, lines, version):
183
return list(self.parse_line_delta_iter(lines))
189
result.append((start, end, count, contents))
185
192
def lower_fulltext(self, content):
186
193
"""convert a fulltext content record into a serializable form.
192
199
def lower_line_delta(self, delta):
193
200
"""convert a delta into a serializable form.
195
See parse_line_delta_iter which this inverts.
202
See parse_line_delta which this inverts.
198
205
for start, end, c, lines in delta:
487
494
The basis knit will be used to the largest extent possible
488
495
since it is assumed that accesses to it is faster.
498
# 4168 calls in 14912, 2289 internal
499
# 4168 in 9711 to read_records
500
# 52554 in 1250 to get_parents
501
# 170166 in 865 to list.append
490
503
# needed_revisions holds a list of (method, version_id) of
491
504
# versions that is needed to be fetched to construct the final
492
505
# version of the file.
750
763
def get_parents(self, version_id):
751
764
"""See VersionedFile.get_parents."""
752
self._check_versions_present([version_id])
753
return list(self._index.get_parents(version_id))
767
# 52554 calls in 1264 872 internal down from 3674
769
return self._index.get_parents(version_id)
771
raise RevisionNotPresent(version_id, self.filename)
755
773
def get_parents_with_ghosts(self, version_id):
756
774
"""See VersionedFile.get_parents."""
757
self._check_versions_present([version_id])
758
return list(self._index.get_parents_with_ghosts(version_id))
776
return self._index.get_parents_with_ghosts(version_id)
778
raise RevisionNotPresent(version_id, self.filename)
760
780
def get_ancestry(self, versions):
761
781
"""See VersionedFile.get_ancestry."""
860
880
# only want the _history index to reference the 1st index entry
862
882
if version_id not in self._cache:
883
index = len(self._history)
863
884
self._history.append(version_id)
864
self._cache[version_id] = (version_id, options, pos, size, parents)
886
index = self._cache[version_id][5]
887
self._cache[version_id] = (version_id,
866
894
def __init__(self, transport, filename, mode, create=False):
867
895
_KnitComponentFile.__init__(self, transport, filename, mode)
916
944
# index entry for version_id
917
945
version_id = rec[0]
918
946
if version_id not in self._cache:
947
index = len(self._history)
919
948
self._history.append(version_id)
950
index = self._cache[version_id][5]
920
951
self._cache[version_id] = (version_id,
921
952
rec[1].split(','),
925
957
# --- self._cache_version
926
958
except NoSuchFile, e:
927
959
if mode != 'w' or not create:
1013
1045
def lookup(self, version_id):
1014
1046
assert version_id in self._cache
1015
return self._history.index(version_id)
1047
return self._cache[version_id][5]
1017
1049
def _version_list_to_index(self, versions):
1018
1050
result_list = []
1019
1051
for version in versions:
1020
1052
if version in self._cache:
1021
result_list.append(str(self._history.index(version)))
1053
# -- inlined lookup() --
1054
result_list.append(str(self._cache[version][5]))
1055
# -- end lookup () --
1023
1057
result_list.append('.' + version.encode('utf-8'))
1024
1058
return ' '.join(result_list)
1154
1188
def _parse_record(self, version_id, data):
1190
# 4168 calls in 2880 217 internal
1191
# 4168 calls to _parse_record_header in 2121
1192
# 4168 calls to readlines in 330
1155
1193
df, rec = self._parse_record_header(version_id, data)
1157
record_contents = self._read_record_contents(df, lines)
1194
record_contents = df.readlines()
1195
l = record_contents.pop()
1196
assert len(record_contents) == int(rec[2])
1159
1197
if l.decode('utf-8') != 'end %s\n' % version_id:
1160
1198
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
1161
1199
% (l, version_id))
1163
1201
return record_contents, rec[3]
1165
def _read_record_contents(self, df, record_lines):
1166
"""Read and return n lines from datafile."""
1168
for i in range(record_lines):
1169
r.append(df.readline())
1172
1203
def read_records_iter_raw(self, records):
1173
1204
"""Read text records from data file and yield raw data.
1212
1243
will be read in the given order. Yields (version_id,
1213
1244
contents, digest).
1247
# 60890 calls for 4168 extractions in 5045, 683 internal.
1248
# 4168 calls to readv in 1411
1249
# 4168 calls to parse_record in 2880
1216
1251
needed_records = []
1217
1252
for version_id, pos, size in records:
1229
1264
self._records[record_id] = (digest, content)
1231
1266
for version_id, pos, size in records:
1232
yield version_id, copy(self._records[version_id][1]), copy(self._records[version_id][0])
1267
yield version_id, list(self._records[version_id][1]), self._records[version_id][0]
1234
1269
def read_records(self, records):
1235
1270
"""Read records into a dictionary."""
1363
1398
InterVersionedFile.register_optimiser(InterKnit)
1366
# make GzipFile faster:
1368
class GzipFile(gzip.GzipFile):
1369
"""Knit tuned version of GzipFile.
1371
This is based on the following lsprof stats:
1372
python 2.4 stock GzipFile write:
1373
58971 0 5644.3090 2721.4730 gzip:193(write)
1374
+58971 0 1159.5530 1159.5530 +<built-in method compress>
1375
+176913 0 987.0320 987.0320 +<len>
1376
+58971 0 423.1450 423.1450 +<zlib.crc32>
1377
+58971 0 353.1060 353.1060 +<method 'write' of 'cStringIO.
1379
tuned GzipFile write:
1380
58971 0 4477.2590 2103.1120 bzrlib.knit:1250(write)
1381
+58971 0 1297.7620 1297.7620 +<built-in method compress>
1382
+58971 0 406.2160 406.2160 +<zlib.crc32>
1383
+58971 0 341.9020 341.9020 +<method 'write' of 'cStringIO.
1385
+58971 0 328.2670 328.2670 +<len>
1388
Yes, its only 1.6 seconds, but they add up.
1391
def write(self, data):
1392
if self.mode != gzip.WRITE:
1394
raise IOError(errno.EBADF, "write() on read-only GzipFile object")
1396
if self.fileobj is None:
1397
raise ValueError, "write() on closed GzipFile object"
1398
data_len = len(data)
1400
self.size = self.size + data_len
1401
self.crc = zlib.crc32(data, self.crc)
1402
self.fileobj.write( self.compress.compress(data) )
1403
self.offset += data_len
1405
def writelines(self, lines):
1406
# profiling indicated a significant overhead
1407
# calling write for each line.
1408
# this batch call is a lot faster :).
1409
# (4 seconds to 1 seconds for the sample upgrades I was testing).
1410
self.write(''.join(lines))
1413
1401
class SequenceMatcher(difflib.SequenceMatcher):
1414
1402
"""Knit tuned sequence matcher.