1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005, 2006 by Canonical Ltd
2
2
# Written by Martin Pool.
3
3
# Modified by Johan Rydberg <jrydberg@gnu.org>
4
4
# Modified by Robert Collins <robert.collins@canonical.com>
77
import bzrlib.errors as errors
82
78
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
83
79
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
84
80
RevisionNotPresent, RevisionAlreadyPresent
86
82
from bzrlib.trace import mutter
87
83
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
85
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
89
86
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
90
87
from bzrlib.tsort import topo_sort
91
88
import bzrlib.weave
92
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
93
import bzrlib.patiencediff
96
91
# TODO: Split out code specific to this format into an associated object.
167
162
internal representation is of the format:
168
163
(revid, plaintext)
170
decode_utf8 = cache_utf8.decode
172
166
for line in content:
173
167
origin, text = line.split(' ', 1)
174
lines.append((decode_utf8(origin), text))
168
lines.append((origin.decode('utf-8'), text))
175
169
return KnitContent(lines)
177
171
def parse_line_delta_iter(self, lines):
201
194
origin, text = next().split(' ', 1)
203
contents.append((decode_utf8(origin), text))
196
contents.append((origin.decode('utf-8'), text))
204
197
result.append((start, end, count, contents))
210
203
see parse_fulltext which this inverts.
212
encode_utf8 = cache_utf8.encode
213
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]
205
return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]
215
207
def lower_line_delta(self, delta):
216
208
"""convert a delta into a serializable form.
218
210
See parse_line_delta which this inverts.
220
encode_utf8 = cache_utf8.encode
222
213
for start, end, c, lines in delta:
223
214
out.append('%d,%d,%d\n' % (start, end, c))
224
out.extend(encode_utf8(origin) + ' ' + text
225
for origin, text in lines)
215
for origin, text in lines:
216
out.append('%s %s' % (origin.encode('utf-8'), text))
281
272
stored and retrieved.
284
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
275
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
285
276
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
286
create=False, create_parent_dir=False, delay_create=False,
288
278
"""Construct a knit at location specified by relpath.
290
280
:param create: If not True, only open an existing knit.
291
:param create_parent_dir: If True, create the parent directory if
292
creating the file fails. (This is used for stores with
293
hash-prefixes that may not exist yet)
294
:param delay_create: The calling code is aware that the knit won't
295
actually be created until the first data is stored.
297
282
if deprecated_passed(basis_knit):
298
283
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
309
294
self.delta = delta
311
296
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
312
access_mode, create=create, file_mode=file_mode,
313
create_parent_dir=create_parent_dir, delay_create=delay_create,
297
access_mode, create=create, file_mode=file_mode)
315
298
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
316
access_mode, create=create and not len(self), file_mode=file_mode,
317
create_parent_dir=create_parent_dir, delay_create=delay_create,
299
access_mode, create=create and not len(self), file_mode=file_mode)
320
301
def __repr__(self):
321
302
return '%s(%s)' % (self.__class__.__name__,
415
396
"""See VersionedFile.copy_to()."""
416
397
# copy the current index to a temp index to avoid racing with local
418
transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',
419
self.transport.get(self._index._filename))
399
transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)
420
400
# copy the data file
421
401
f = self._data._open_file()
423
transport.put_file(name + DATA_SUFFIX, f)
403
transport.put(name + DATA_SUFFIX, f)
426
406
# move the copied index into place
427
407
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
429
409
def create_empty(self, name, transport, mode=None):
430
return KnitVersionedFile(name, transport, factory=self.factory,
431
delta=self.delta, create=True)
410
return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)
433
412
def _fix_parents(self, version, new_parents):
434
413
"""Fix the parents list for version.
524
503
for parent_id in parents:
525
504
merge_content = self._get_content(parent_id, parent_texts)
526
seq = bzrlib.patiencediff.PatienceSequenceMatcher(
527
None, merge_content.text(), content.text())
505
seq = KnitSequenceMatcher(None, merge_content.text(), content.text())
528
506
if delta_seq is None:
529
507
# setup a delta seq to reuse.
541
519
reference_content = self._get_content(parents[0], parent_texts)
542
520
new_texts = content.text()
543
521
old_texts = reference_content.text()
544
delta_seq = bzrlib.patiencediff.PatienceSequenceMatcher(
545
None, old_texts, new_texts)
522
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
546
523
return self._make_line_delta(delta_seq, content)
548
525
def _make_line_delta(self, delta_seq, new_content):
802
779
text_map[version_id] = text
803
780
return text_map, final_content
805
def iter_lines_added_or_present_in_versions(self, version_ids=None,
782
def iter_lines_added_or_present_in_versions(self, version_ids=None):
807
783
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
808
784
if version_ids is None:
809
785
version_ids = self.versions()
811
pb = progress.DummyProgress()
812
786
# we don't care about inclusions, the caller cares.
813
787
# but we need to setup a list of records to visit.
814
788
# we need version_id, position, length
826
800
data_pos, length = self._index.get_position(version_id)
827
801
version_id_records.append((version_id, data_pos, length))
803
pb = bzrlib.ui.ui_factory.nested_progress_bar()
830
805
total = len(version_id_records)
831
pb.update('Walking content.', count, total)
832
for version_id, data, sha_value in \
833
self._data.read_records_iter(version_id_records):
834
807
pb.update('Walking content.', count, total)
835
method = self._index.get_method(version_id)
836
version_idx = self._index.lookup(version_id)
837
assert method in ('fulltext', 'line-delta')
838
if method == 'fulltext':
839
content = self.factory.parse_fulltext(data, version_idx)
840
for line in content.text():
843
delta = self.factory.parse_line_delta(data, version_idx)
844
for start, end, count, lines in delta:
845
for origin, line in lines:
808
for version_id, data, sha_value in \
809
self._data.read_records_iter(version_id_records):
810
pb.update('Walking content.', count, total)
811
method = self._index.get_method(version_id)
812
version_idx = self._index.lookup(version_id)
813
assert method in ('fulltext', 'line-delta')
814
if method == 'fulltext':
815
content = self.factory.parse_fulltext(data, version_idx)
816
for line in content.text():
848
pb.update('Walking content.', total, total)
819
delta = self.factory.parse_line_delta(data, version_idx)
820
for start, end, count, lines in delta:
821
for origin, line in lines:
824
pb.update('Walking content.', total, total)
827
pb.update('Walking content.', total, total)
850
831
def num_versions(self):
851
832
"""See VersionedFile.num_versions()."""
958
939
class _KnitComponentFile(object):
959
940
"""One of the files used to implement a knit database"""
961
def __init__(self, transport, filename, mode, file_mode=None,
962
create_parent_dir=False, dir_mode=None):
942
def __init__(self, transport, filename, mode, file_mode=None):
963
943
self._transport = transport
964
944
self._filename = filename
965
945
self._mode = mode
966
self._file_mode = file_mode
967
self._dir_mode = dir_mode
968
self._create_parent_dir = create_parent_dir
969
self._need_to_create = False
946
self._file_mode=file_mode
948
def write_header(self):
949
if self._transport.append(self._filename, StringIO(self.HEADER),
950
mode=self._file_mode):
951
raise KnitCorrupt(self._filename, 'misaligned after writing header')
971
953
def check_header(self, fp):
972
954
line = fp.readline()
1062
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1063
create_parent_dir=False, delay_create=False, dir_mode=None):
1064
_KnitComponentFile.__init__(self, transport, filename, mode,
1065
file_mode=file_mode,
1066
create_parent_dir=create_parent_dir,
1044
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1045
_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)
1068
1046
self._cache = {}
1069
1047
# position in _history is the 'official' index for a revision
1070
1048
# but the values may have come from a newer entry.
1139
1117
except NoSuchFile, e:
1140
1118
if mode != 'w' or not create:
1143
self._need_to_create = True
1145
self._transport.put_bytes_non_atomic(self._filename,
1146
self.HEADER, mode=self._file_mode)
1149
1122
pb.update('read knit index', total, total)
1241
1213
result_list.append(str(self._cache[version][5]))
1242
1214
# -- end lookup () --
1244
result_list.append('.' + encode_utf8(version))
1216
result_list.append('.' + version.encode('utf-8'))
1245
1217
return ' '.join(result_list)
1247
1219
def add_version(self, version_id, options, pos, size, parents):
1255
1227
(version_id, options, pos, size, parents).
1258
encode_utf8 = cache_utf8.encode
1259
1230
for version_id, options, pos, size, parents in versions:
1260
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1231
line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),
1261
1232
','.join(options),
1265
1236
assert isinstance(line, str), \
1266
1237
'content must be utf-8 encoded: %r' % (line,)
1267
1238
lines.append(line)
1268
if not self._need_to_create:
1269
self._transport.append_bytes(self._filename, ''.join(lines))
1272
sio.write(self.HEADER)
1273
sio.writelines(lines)
1275
self._transport.put_file_non_atomic(self._filename, sio,
1276
create_parent_dir=self._create_parent_dir,
1277
mode=self._file_mode,
1278
dir_mode=self._dir_mode)
1279
self._need_to_create = False
1239
self._transport.append(self._filename, StringIO(''.join(lines)))
1281
1240
# cache after writing, so that a failed write leads to missing cache
1282
1241
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1283
1242
# failure, reload the index or flush it or some such, to prevent
1288
1247
def has_version(self, version_id):
1289
1248
"""True if the version is in the index."""
1290
return (version_id in self._cache)
1249
return self._cache.has_key(version_id)
1292
1251
def get_position(self, version_id):
1293
1252
"""Return data position and size of specified version."""
1328
1287
class _KnitData(_KnitComponentFile):
1329
1288
"""Contents of the knit data file"""
1331
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1332
create_parent_dir=False, delay_create=False,
1334
_KnitComponentFile.__init__(self, transport, filename, mode,
1335
file_mode=file_mode,
1336
create_parent_dir=create_parent_dir,
1290
HEADER = "# bzr knit data 8\n"
1292
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1293
_KnitComponentFile.__init__(self, transport, filename, mode)
1338
1294
self._checked = False
1339
1295
# TODO: jam 20060713 conceptually, this could spill to disk
1340
1296
# if the cached size gets larger than a certain amount
1343
1299
self._cache = {}
1344
1300
self._do_cache = False
1347
self._need_to_create = create
1349
self._transport.put_bytes_non_atomic(self._filename, '',
1350
mode=self._file_mode)
1302
self._transport.put(self._filename, StringIO(''), mode=file_mode)
1352
1304
def enable_cache(self):
1353
1305
"""Enable caching of reads."""
1373
1325
sio = StringIO()
1374
1326
data_file = GzipFile(None, mode='wb', fileobj=sio)
1376
version_id_utf8 = cache_utf8.encode(version_id)
1377
1327
data_file.writelines(chain(
1378
["version %s %d %s\n" % (version_id_utf8,
1328
["version %s %d %s\n" % (version_id.encode('utf-8'),
1382
["end %s\n" % version_id_utf8]))
1332
["end %s\n" % version_id.encode('utf-8')]))
1383
1333
data_file.close()
1384
1334
length= sio.tell()
1392
1342
:return: the offset in the data file raw_data was written.
1394
1344
assert isinstance(raw_data, str), 'data must be plain bytes'
1395
if not self._need_to_create:
1396
return self._transport.append_bytes(self._filename, raw_data)
1398
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1399
create_parent_dir=self._create_parent_dir,
1400
mode=self._file_mode,
1401
dir_mode=self._dir_mode)
1402
self._need_to_create = False
1345
return self._transport.append(self._filename, StringIO(raw_data))
1405
1347
def add_record(self, version_id, digest, lines):
1406
1348
"""Write new text record to disk. Returns the position in the
1407
1349
file where it was written."""
1408
1350
size, sio = self._record_to_data(version_id, digest, lines)
1409
1351
# write to disk
1410
if not self._need_to_create:
1411
start_pos = self._transport.append_file(self._filename, sio)
1413
self._transport.put_file_non_atomic(self._filename, sio,
1414
create_parent_dir=self._create_parent_dir,
1415
mode=self._file_mode,
1416
dir_mode=self._dir_mode)
1417
self._need_to_create = False
1352
start_pos = self._transport.append(self._filename, sio)
1419
1353
if self._do_cache:
1420
1354
self._cache[version_id] = sio.getvalue()
1421
1355
return start_pos, size
1430
1364
rec = df.readline().split()
1431
1365
if len(rec) != 4:
1432
1366
raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')
1433
if cache_utf8.decode(rec[1]) != version_id:
1367
if rec[1].decode('utf-8')!= version_id:
1434
1368
raise KnitCorrupt(self._filename,
1435
1369
'unexpected version, wanted %r, got %r' % (
1436
1370
version_id, rec[1]))
1445
1379
record_contents = df.readlines()
1446
1380
l = record_contents.pop()
1447
1381
assert len(record_contents) == int(rec[2])
1448
if l != 'end %s\n' % cache_utf8.encode(version_id):
1382
if l.decode('utf-8') != 'end %s\n' % version_id:
1449
1383
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
1450
1384
% (l, version_id))