77
import bzrlib.errors as errors
82
78
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
83
79
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
84
80
RevisionNotPresent, RevisionAlreadyPresent
86
82
from bzrlib.trace import mutter
87
83
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
85
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
89
86
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
90
87
from bzrlib.tsort import topo_sort
91
88
import bzrlib.weave
92
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
95
91
# TODO: Split out code specific to this format into an associated object.
166
162
internal representation is of the format:
167
163
(revid, plaintext)
169
decode_utf8 = cache_utf8.decode
171
166
for line in content:
172
167
origin, text = line.split(' ', 1)
173
lines.append((decode_utf8(origin), text))
168
lines.append((origin.decode('utf-8'), text))
174
169
return KnitContent(lines)
176
171
def parse_line_delta_iter(self, lines):
200
194
origin, text = next().split(' ', 1)
202
contents.append((decode_utf8(origin), text))
196
contents.append((origin.decode('utf-8'), text))
203
197
result.append((start, end, count, contents))
209
203
see parse_fulltext which this inverts.
211
encode_utf8 = cache_utf8.encode
212
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]
205
return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]
214
207
def lower_line_delta(self, delta):
215
208
"""convert a delta into a serializable form.
217
210
See parse_line_delta which this inverts.
219
encode_utf8 = cache_utf8.encode
221
213
for start, end, c, lines in delta:
222
214
out.append('%d,%d,%d\n' % (start, end, c))
223
out.extend(encode_utf8(origin) + ' ' + text
224
for origin, text in lines)
215
for origin, text in lines:
216
out.append('%s %s' % (origin.encode('utf-8'), text))
280
272
stored and retrieved.
283
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
275
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
284
276
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
285
create=False, create_parent_dir=False, delay_create=False,
287
278
"""Construct a knit at location specified by relpath.
289
280
:param create: If not True, only open an existing knit.
290
:param create_parent_dir: If True, create the parent directory if
291
creating the file fails. (This is used for stores with
292
hash-prefixes that may not exist yet)
293
:param delay_create: The calling code is aware that the knit won't
294
actually be created until the first data is stored.
296
282
if deprecated_passed(basis_knit):
297
283
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
308
294
self.delta = delta
310
296
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
311
access_mode, create=create, file_mode=file_mode,
312
create_parent_dir=create_parent_dir, delay_create=delay_create,
297
access_mode, create=create, file_mode=file_mode)
314
298
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
315
access_mode, create=create and not len(self), file_mode=file_mode,
316
create_parent_dir=create_parent_dir, delay_create=delay_create,
299
access_mode, create=create and not len(self), file_mode=file_mode)
319
301
def __repr__(self):
320
302
return '%s(%s)' % (self.__class__.__name__,
414
396
"""See VersionedFile.copy_to()."""
415
397
# copy the current index to a temp index to avoid racing with local
417
transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',
418
self.transport.get(self._index._filename))
399
transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)
419
400
# copy the data file
420
401
f = self._data._open_file()
422
transport.put_file(name + DATA_SUFFIX, f)
403
transport.put(name + DATA_SUFFIX, f)
425
406
# move the copied index into place
426
407
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
428
409
def create_empty(self, name, transport, mode=None):
429
return KnitVersionedFile(name, transport, factory=self.factory,
430
delta=self.delta, create=True)
410
return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)
432
412
def _fix_parents(self, version, new_parents):
433
413
"""Fix the parents list for version.
799
779
text_map[version_id] = text
800
780
return text_map, final_content
802
def iter_lines_added_or_present_in_versions(self, version_ids=None,
782
def iter_lines_added_or_present_in_versions(self, version_ids=None):
804
783
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
805
784
if version_ids is None:
806
785
version_ids = self.versions()
808
pb = progress.DummyProgress()
809
786
# we don't care about inclusions, the caller cares.
810
787
# but we need to setup a list of records to visit.
811
788
# we need version_id, position, length
823
800
data_pos, length = self._index.get_position(version_id)
824
801
version_id_records.append((version_id, data_pos, length))
803
pb = bzrlib.ui.ui_factory.nested_progress_bar()
827
805
total = len(version_id_records)
828
pb.update('Walking content.', count, total)
829
for version_id, data, sha_value in \
830
self._data.read_records_iter(version_id_records):
831
807
pb.update('Walking content.', count, total)
832
method = self._index.get_method(version_id)
833
version_idx = self._index.lookup(version_id)
834
assert method in ('fulltext', 'line-delta')
835
if method == 'fulltext':
836
content = self.factory.parse_fulltext(data, version_idx)
837
for line in content.text():
840
delta = self.factory.parse_line_delta(data, version_idx)
841
for start, end, count, lines in delta:
842
for origin, line in lines:
808
for version_id, data, sha_value in \
809
self._data.read_records_iter(version_id_records):
810
pb.update('Walking content.', count, total)
811
method = self._index.get_method(version_id)
812
version_idx = self._index.lookup(version_id)
813
assert method in ('fulltext', 'line-delta')
814
if method == 'fulltext':
815
content = self.factory.parse_fulltext(data, version_idx)
816
for line in content.text():
845
pb.update('Walking content.', total, total)
819
delta = self.factory.parse_line_delta(data, version_idx)
820
for start, end, count, lines in delta:
821
for origin, line in lines:
824
pb.update('Walking content.', total, total)
827
pb.update('Walking content.', total, total)
847
831
def num_versions(self):
848
832
"""See VersionedFile.num_versions()."""
955
939
class _KnitComponentFile(object):
956
940
"""One of the files used to implement a knit database"""
958
def __init__(self, transport, filename, mode, file_mode=None,
959
create_parent_dir=False, dir_mode=None):
942
def __init__(self, transport, filename, mode, file_mode=None):
960
943
self._transport = transport
961
944
self._filename = filename
962
945
self._mode = mode
963
self._file_mode = file_mode
964
self._dir_mode = dir_mode
965
self._create_parent_dir = create_parent_dir
966
self._need_to_create = False
946
self._file_mode=file_mode
948
def write_header(self):
949
if self._transport.append(self._filename, StringIO(self.HEADER),
950
mode=self._file_mode):
951
raise KnitCorrupt(self._filename, 'misaligned after writing header')
968
953
def check_header(self, fp):
969
954
line = fp.readline()
1059
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1060
create_parent_dir=False, delay_create=False, dir_mode=None):
1061
_KnitComponentFile.__init__(self, transport, filename, mode,
1062
file_mode=file_mode,
1063
create_parent_dir=create_parent_dir,
1044
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1045
_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)
1065
1046
self._cache = {}
1066
1047
# position in _history is the 'official' index for a revision
1067
1048
# but the values may have come from a newer entry.
1136
1117
except NoSuchFile, e:
1137
1118
if mode != 'w' or not create:
1140
self._need_to_create = True
1142
self._transport.put_bytes_non_atomic(self._filename,
1143
self.HEADER, mode=self._file_mode)
1146
1122
pb.update('read knit index', total, total)
1238
1213
result_list.append(str(self._cache[version][5]))
1239
1214
# -- end lookup () --
1241
result_list.append('.' + encode_utf8(version))
1216
result_list.append('.' + version.encode('utf-8'))
1242
1217
return ' '.join(result_list)
1244
1219
def add_version(self, version_id, options, pos, size, parents):
1252
1227
(version_id, options, pos, size, parents).
1255
encode_utf8 = cache_utf8.encode
1256
1230
for version_id, options, pos, size, parents in versions:
1257
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1231
line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),
1258
1232
','.join(options),
1262
1236
assert isinstance(line, str), \
1263
1237
'content must be utf-8 encoded: %r' % (line,)
1264
1238
lines.append(line)
1265
if not self._need_to_create:
1266
self._transport.append_bytes(self._filename, ''.join(lines))
1269
sio.write(self.HEADER)
1270
sio.writelines(lines)
1272
self._transport.put_file_non_atomic(self._filename, sio,
1273
create_parent_dir=self._create_parent_dir,
1274
mode=self._file_mode,
1275
dir_mode=self._dir_mode)
1276
self._need_to_create = False
1239
self._transport.append(self._filename, StringIO(''.join(lines)))
1278
1240
# cache after writing, so that a failed write leads to missing cache
1279
1241
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1280
1242
# failure, reload the index or flush it or some such, to prevent
1285
1247
def has_version(self, version_id):
1286
1248
"""True if the version is in the index."""
1287
return (version_id in self._cache)
1249
return self._cache.has_key(version_id)
1289
1251
def get_position(self, version_id):
1290
1252
"""Return data position and size of specified version."""
1325
1287
class _KnitData(_KnitComponentFile):
1326
1288
"""Contents of the knit data file"""
1328
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1329
create_parent_dir=False, delay_create=False,
1331
_KnitComponentFile.__init__(self, transport, filename, mode,
1332
file_mode=file_mode,
1333
create_parent_dir=create_parent_dir,
1290
HEADER = "# bzr knit data 8\n"
1292
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1293
_KnitComponentFile.__init__(self, transport, filename, mode)
1335
1294
self._checked = False
1336
1295
# TODO: jam 20060713 conceptually, this could spill to disk
1337
1296
# if the cached size gets larger than a certain amount
1340
1299
self._cache = {}
1341
1300
self._do_cache = False
1344
self._need_to_create = create
1346
self._transport.put_bytes_non_atomic(self._filename, '',
1347
mode=self._file_mode)
1302
self._transport.put(self._filename, StringIO(''), mode=file_mode)
1349
1304
def enable_cache(self):
1350
1305
"""Enable caching of reads."""
1370
1325
sio = StringIO()
1371
1326
data_file = GzipFile(None, mode='wb', fileobj=sio)
1373
version_id_utf8 = cache_utf8.encode(version_id)
1374
1327
data_file.writelines(chain(
1375
["version %s %d %s\n" % (version_id_utf8,
1328
["version %s %d %s\n" % (version_id.encode('utf-8'),
1379
["end %s\n" % version_id_utf8]))
1332
["end %s\n" % version_id.encode('utf-8')]))
1380
1333
data_file.close()
1381
1334
length= sio.tell()
1389
1342
:return: the offset in the data file raw_data was written.
1391
1344
assert isinstance(raw_data, str), 'data must be plain bytes'
1392
if not self._need_to_create:
1393
return self._transport.append_bytes(self._filename, raw_data)
1395
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1396
create_parent_dir=self._create_parent_dir,
1397
mode=self._file_mode,
1398
dir_mode=self._dir_mode)
1399
self._need_to_create = False
1345
return self._transport.append(self._filename, StringIO(raw_data))
1402
1347
def add_record(self, version_id, digest, lines):
1403
1348
"""Write new text record to disk. Returns the position in the
1404
1349
file where it was written."""
1405
1350
size, sio = self._record_to_data(version_id, digest, lines)
1406
1351
# write to disk
1407
if not self._need_to_create:
1408
start_pos = self._transport.append_file(self._filename, sio)
1410
self._transport.put_file_non_atomic(self._filename, sio,
1411
create_parent_dir=self._create_parent_dir,
1412
mode=self._file_mode,
1413
dir_mode=self._dir_mode)
1414
self._need_to_create = False
1352
start_pos = self._transport.append(self._filename, sio)
1416
1353
if self._do_cache:
1417
1354
self._cache[version_id] = sio.getvalue()
1418
1355
return start_pos, size
1427
1364
rec = df.readline().split()
1428
1365
if len(rec) != 4:
1429
1366
raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')
1430
if cache_utf8.decode(rec[1]) != version_id:
1367
if rec[1].decode('utf-8')!= version_id:
1431
1368
raise KnitCorrupt(self._filename,
1432
1369
'unexpected version, wanted %r, got %r' % (
1433
1370
version_id, rec[1]))
1442
1379
record_contents = df.readlines()
1443
1380
l = record_contents.pop()
1444
1381
assert len(record_contents) == int(rec[2])
1445
if l != 'end %s\n' % cache_utf8.encode(version_id):
1382
if l.decode('utf-8') != 'end %s\n' % version_id:
1446
1383
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
1447
1384
% (l, version_id))