222
186
internal representation is
223
187
(start, end, count, [1..count tuples (revid, newline)])
189
decode_utf8 = cache_utf8.decode
226
191
lines = iter(lines)
227
192
next = lines.next
230
def cache_and_return(line):
231
origin, text = line.split(' ', 1)
232
return cache.setdefault(origin, origin), text
234
193
# walk through the lines parsing.
235
194
for header in lines:
236
195
start, end, count = [int(n) for n in header.split(',')]
237
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
199
origin, text = next().split(' ', 1)
201
contents.append((decode_utf8(origin), text))
238
202
result.append((start, end, count, contents))
241
def get_fulltext_content(self, lines):
242
"""Extract just the content lines from a fulltext."""
243
return (line.split(' ', 1)[1] for line in lines)
245
def get_linedelta_content(self, lines):
246
"""Extract just the content from a line delta.
248
This doesn't return all of the extra information stored in a delta.
249
Only the actual content lines.
254
header = header.split(',')
255
count = int(header[2])
256
for i in xrange(count):
257
origin, text = next().split(' ', 1)
260
205
def lower_fulltext(self, content):
261
206
"""convert a fulltext content record into a serializable form.
263
208
see parse_fulltext which this inverts.
265
# TODO: jam 20070209 We only do the caching thing to make sure that
266
# the origin is a valid utf-8 line, eventually we could remove it
267
return ['%s %s' % (o, t) for o, t in content._lines]
210
encode_utf8 = cache_utf8.encode
211
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]
269
213
def lower_line_delta(self, delta):
270
214
"""convert a delta into a serializable form.
272
216
See parse_line_delta which this inverts.
274
# TODO: jam 20070209 We only do the caching thing to make sure that
275
# the origin is a valid utf-8 line, eventually we could remove it
218
encode_utf8 = cache_utf8.encode
277
220
for start, end, c, lines in delta:
278
221
out.append('%d,%d,%d\n' % (start, end, c))
279
out.extend(origin + ' ' + text
222
out.extend(encode_utf8(origin) + ' ' + text
280
223
for origin, text in lines)
385
300
self.writable = (access_mode == 'w')
386
301
self.delta = delta
388
self._max_delta_chain = 200
391
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
392
access_mode, create=create, file_mode=file_mode,
393
create_parent_dir=create_parent_dir, delay_create=delay_create,
303
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
304
access_mode, create=create, file_mode=file_mode)
397
305
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
398
access_mode, create=create and not len(self), file_mode=file_mode,
399
create_parent_dir=create_parent_dir, delay_create=delay_create,
306
access_mode, create=create and not len(self), file_mode=file_mode)
402
308
def __repr__(self):
403
309
return '%s(%s)' % (self.__class__.__name__,
404
310
self.transport.abspath(self.filename))
406
def _check_should_delta(self, first_parents):
407
"""Iterate back through the parent listing, looking for a fulltext.
409
This is used when we want to decide whether to add a delta or a new
410
fulltext. It searches for _max_delta_chain parents. When it finds a
411
fulltext parent, it sees if the total size of the deltas leading up to
412
it is large enough to indicate that we want a new full text anyway.
414
Return True if we should create a new delta, False if we should use a
419
delta_parents = first_parents
420
for count in xrange(self._max_delta_chain):
421
parent = delta_parents[0]
422
method = self._index.get_method(parent)
423
pos, size = self._index.get_position(parent)
424
if method == 'fulltext':
428
delta_parents = self._index.get_parents(parent)
430
# We couldn't find a fulltext, so we must create a new one
433
return fulltext_size > delta_size
435
312
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
436
313
"""See VersionedFile._add_delta()."""
437
314
self._check_add(version_id, []) # should we check the lines ?
909
786
text_map[version_id] = text
910
787
return text_map, final_content
912
def iter_lines_added_or_present_in_versions(self, version_ids=None,
789
def iter_lines_added_or_present_in_versions(self, version_ids=None):
914
790
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
915
791
if version_ids is None:
916
792
version_ids = self.versions()
918
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
920
pb = progress.DummyProgress()
921
793
# we don't care about inclusions, the caller cares.
922
794
# but we need to setup a list of records to visit.
923
795
# we need version_id, position, length
924
796
version_id_records = []
925
requested_versions = set(version_ids)
797
requested_versions = list(version_ids)
926
798
# filter for available versions
927
799
for version_id in requested_versions:
928
800
if not self.has_version(version_id):
929
801
raise RevisionNotPresent(version_id, self.filename)
930
802
# get a in-component-order queue:
931
804
for version_id in self.versions():
932
805
if version_id in requested_versions:
806
version_ids.append(version_id)
933
807
data_pos, length = self._index.get_position(version_id)
934
808
version_id_records.append((version_id, data_pos, length))
810
pb = bzrlib.ui.ui_factory.nested_progress_bar()
936
812
total = len(version_id_records)
937
for version_idx, (version_id, data, sha_value) in \
938
enumerate(self._data.read_records_iter(version_id_records)):
939
pb.update('Walking content.', version_idx, total)
940
method = self._index.get_method(version_id)
942
assert method in ('fulltext', 'line-delta')
943
if method == 'fulltext':
944
line_iterator = self.factory.get_fulltext_content(data)
946
line_iterator = self.factory.get_linedelta_content(data)
947
for line in line_iterator:
950
pb.update('Walking content.', total, total)
814
pb.update('Walking content.', count, total)
815
for version_id, data, sha_value in \
816
self._data.read_records_iter(version_id_records):
817
pb.update('Walking content.', count, total)
818
method = self._index.get_method(version_id)
819
version_idx = self._index.lookup(version_id)
820
assert method in ('fulltext', 'line-delta')
821
if method == 'fulltext':
822
content = self.factory.parse_fulltext(data, version_idx)
823
for line in content.text():
826
delta = self.factory.parse_line_delta(data, version_idx)
827
for start, end, count, lines in delta:
828
for origin, line in lines:
831
pb.update('Walking content.', total, total)
834
pb.update('Walking content.', total, total)
952
def iter_parents(self, version_ids):
953
"""Iterate through the parents for many version ids.
955
:param version_ids: An iterable yielding version_ids.
956
:return: An iterator that yields (version_id, parents). Requested
957
version_ids not present in the versioned file are simply skipped.
958
The order is undefined, allowing for different optimisations in
959
the underlying implementation.
961
version_ids = [osutils.safe_revision_id(version_id) for
962
version_id in version_ids]
963
return self._index.iter_parents(version_ids)
965
838
def num_versions(self):
966
839
"""See VersionedFile.num_versions()."""
967
840
return self._index.num_versions()
1036
906
def plan_merge(self, ver_a, ver_b):
1037
907
"""See VersionedFile.plan_merge."""
1038
ver_a = osutils.safe_revision_id(ver_a)
1039
ver_b = osutils.safe_revision_id(ver_b)
1040
ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))
908
ancestors_b = set(self.get_ancestry(ver_b))
909
def status_a(revision, text):
910
if revision in ancestors_b:
911
return 'killed-b', text
1042
ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))
915
ancestors_a = set(self.get_ancestry(ver_a))
916
def status_b(revision, text):
917
if revision in ancestors_a:
918
return 'killed-a', text
1043
922
annotated_a = self.annotate(ver_a)
1044
923
annotated_b = self.annotate(ver_b)
1045
return merge._plan_annotate_merge(annotated_a, annotated_b,
1046
ancestors_a, ancestors_b)
924
plain_a = [t for (a, t) in annotated_a]
925
plain_b = [t for (a, t) in annotated_b]
926
blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()
929
for ai, bi, l in blocks:
930
# process all mismatched sections
931
# (last mismatched section is handled because blocks always
932
# includes a 0-length last block)
933
for revision, text in annotated_a[a_cur:ai]:
934
yield status_a(revision, text)
935
for revision, text in annotated_b[b_cur:bi]:
936
yield status_b(revision, text)
938
# and now the matched section
941
for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):
942
assert text_a == text_b
943
yield "unchanged", text_a
1049
946
class _KnitComponentFile(object):
1050
947
"""One of the files used to implement a knit database"""
1052
def __init__(self, transport, filename, mode, file_mode=None,
1053
create_parent_dir=False, dir_mode=None):
949
def __init__(self, transport, filename, mode, file_mode=None):
1054
950
self._transport = transport
1055
951
self._filename = filename
1056
952
self._mode = mode
1057
self._file_mode = file_mode
1058
self._dir_mode = dir_mode
1059
self._create_parent_dir = create_parent_dir
1060
self._need_to_create = False
953
self._file_mode=file_mode
1062
def _full_path(self):
1063
"""Return the full path to this file."""
1064
return self._transport.base + self._filename
955
def write_header(self):
956
if self._transport.append(self._filename, StringIO(self.HEADER),
957
mode=self._file_mode):
958
raise KnitCorrupt(self._filename, 'misaligned after writing header')
1066
960
def check_header(self, fp):
1067
961
line = fp.readline()
1069
# An empty file can actually be treated as though the file doesn't
1071
raise errors.NoSuchFile(self._full_path())
1072
962
if line != self.HEADER:
1073
raise KnitHeaderError(badline=line,
1074
filename=self._transport.abspath(self._filename))
963
raise KnitHeaderError(badline=line)
1076
965
def commit(self):
1077
966
"""Commit is a nop."""
1152
1041
self._history.append(version_id)
1154
1043
index = self._cache[version_id][5]
1155
self._cache[version_id] = (version_id,
1044
self._cache[version_id] = (version_id,
1162
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1163
create_parent_dir=False, delay_create=False, dir_mode=None):
1164
_KnitComponentFile.__init__(self, transport, filename, mode,
1165
file_mode=file_mode,
1166
create_parent_dir=create_parent_dir,
1051
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1052
_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)
1168
1053
self._cache = {}
1169
1054
# position in _history is the 'official' index for a revision
1170
1055
# but the values may have come from a newer entry.
1171
1056
# so - wc -l of a knit index is != the number of unique names
1173
1058
self._history = []
1059
pb = bzrlib.ui.ui_factory.nested_progress_bar()
1175
fp = self._transport.get(self._filename)
1177
# _load_data may raise NoSuchFile if the target knit is
1179
_load_data(self, fp)
1183
if mode != 'w' or not create:
1186
self._need_to_create = True
1064
pb.update('read knit index', count, total)
1065
fp = self._transport.get(self._filename)
1067
self.check_header(fp)
1068
# readlines reads the whole file at once:
1069
# bad for transports like http, good for local disk
1070
# we save 60 ms doing this one change (
1071
# from calling readline each time to calling
1073
# probably what we want for nice behaviour on
1074
# http is a incremental readlines that yields, or
1075
# a check for local vs non local indexes,
1076
for l in fp.readlines():
1078
if len(rec) < 5 or rec[-1] != ':':
1080
# FIXME: in the future we should determine if its a
1081
# short write - and ignore it
1082
# or a different failure, and raise. RBC 20060407
1086
#pb.update('read knit index', count, total)
1087
# See self._parse_parents
1089
for value in rec[4:-1]:
1091
# uncompressed reference
1092
parents.append(value[1:])
1094
# this is 15/4000ms faster than isinstance,
1096
# this function is called thousands of times a
1097
# second so small variations add up.
1098
assert value.__class__ is str
1099
parents.append(self._history[int(value)])
1100
# end self._parse_parents
1101
# self._cache_version(rec[0],
1102
# rec[1].split(','),
1106
# --- self._cache_version
1107
# only want the _history index to reference the 1st
1108
# index entry for version_id
1110
if version_id not in self._cache:
1111
index = len(self._history)
1112
self._history.append(version_id)
1114
index = self._cache[version_id][5]
1115
self._cache[version_id] = (version_id,
1121
# --- self._cache_version
1124
except NoSuchFile, e:
1125
if mode != 'w' or not create:
1129
pb.update('read knit index', total, total)
1132
def _parse_parents(self, compressed_parents):
1133
"""convert a list of string parent values into version ids.
1135
ints are looked up in the index.
1136
.FOO values are ghosts and converted in to FOO.
1138
NOTE: the function is retained here for clarity, and for possible
1139
use in partial index reads. However bulk processing now has
1140
it inlined in __init__ for inner-loop optimisation.
1143
for value in compressed_parents:
1144
if value[-1] == '.':
1145
# uncompressed reference
1146
result.append(value[1:])
1188
self._transport.put_bytes_non_atomic(
1189
self._filename, self.HEADER, mode=self._file_mode)
1148
# this is 15/4000ms faster than isinstance,
1149
# this function is called thousands of times a
1150
# second so small variations add up.
1151
assert value.__class__ is str
1152
result.append(self._history[int(value)])
1191
1155
def get_graph(self):
1192
"""Return a list of the node:parents lists from this knit index."""
1193
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
1157
for version_id, index in self._cache.iteritems():
1158
graph.append((version_id, index[4]))
1195
def get_ancestry(self, versions, topo_sorted=True):
1161
def get_ancestry(self, versions):
1196
1162
"""See VersionedFile.get_ancestry."""
1197
1163
# get a graph of all the mentioned versions:
1199
1165
pending = set(versions)
1202
1167
version = pending.pop()
1168
parents = self._cache[version][4]
1169
# got the parents ok
1205
parents = [p for p in cache[version][4] if p in cache]
1207
raise RevisionNotPresent(version, self._filename)
1208
# if not completed and not a ghost
1209
pending.update([p for p in parents if p not in graph])
1171
parents = [parent for parent in parents if parent in self._cache]
1172
for parent in parents:
1173
# if not completed and not a ghost
1174
if parent not in graph:
1210
1176
graph[version] = parents
1213
1177
return topo_sort(graph.items())
1215
1179
def get_ancestry_with_ghosts(self, versions):
1216
1180
"""See VersionedFile.get_ancestry_with_ghosts."""
1217
1181
# get a graph of all the mentioned versions:
1218
self.check_versions_present(versions)
1221
1183
pending = set(versions)
1223
1185
version = pending.pop()
1225
parents = cache[version][4]
1187
parents = self._cache[version][4]
1226
1188
except KeyError:
1227
1189
# ghost, fake it
1228
1190
graph[version] = []
1231
pending.update([p for p in parents if p not in graph])
1193
# got the parents ok
1194
for parent in parents:
1195
if parent not in graph:
1232
1197
graph[version] = parents
1233
1198
return topo_sort(graph.items())
1235
def iter_parents(self, version_ids):
1236
"""Iterate through the parents for many version ids.
1238
:param version_ids: An iterable yielding version_ids.
1239
:return: An iterator that yields (version_id, parents). Requested
1240
version_ids not present in the versioned file are simply skipped.
1241
The order is undefined, allowing for different optimisations in
1242
the underlying implementation.
1244
for version_id in version_ids:
1246
yield version_id, tuple(self.get_parents(version_id))
1250
1200
def num_versions(self):
1251
1201
return len(self._history)
1253
1203
__len__ = num_versions
1255
1205
def get_versions(self):
1256
"""Get all the versions in the file. not topologically sorted."""
1257
1206
return self._history
1208
def idx_to_name(self, idx):
1209
return self._history[idx]
1211
def lookup(self, version_id):
1212
assert version_id in self._cache
1213
return self._cache[version_id][5]
1259
1215
def _version_list_to_index(self, versions):
1216
encode_utf8 = cache_utf8.encode
1260
1217
result_list = []
1262
1218
for version in versions:
1263
if version in cache:
1219
if version in self._cache:
1264
1220
# -- inlined lookup() --
1265
result_list.append(str(cache[version][5]))
1221
result_list.append(str(self._cache[version][5]))
1266
1222
# -- end lookup () --
1268
result_list.append('.' + version)
1224
result_list.append('.' + encode_utf8(version))
1269
1225
return ' '.join(result_list)
1271
1227
def add_version(self, version_id, options, pos, size, parents):
1279
1235
(version_id, options, pos, size, parents).
1282
orig_history = self._history[:]
1283
orig_cache = self._cache.copy()
1286
for version_id, options, pos, size, parents in versions:
1287
line = "\n%s %s %s %s %s :" % (version_id,
1291
self._version_list_to_index(parents))
1292
assert isinstance(line, str), \
1293
'content must be utf-8 encoded: %r' % (line,)
1295
self._cache_version(version_id, options, pos, size, parents)
1296
if not self._need_to_create:
1297
self._transport.append_bytes(self._filename, ''.join(lines))
1300
sio.write(self.HEADER)
1301
sio.writelines(lines)
1303
self._transport.put_file_non_atomic(self._filename, sio,
1304
create_parent_dir=self._create_parent_dir,
1305
mode=self._file_mode,
1306
dir_mode=self._dir_mode)
1307
self._need_to_create = False
1309
# If any problems happen, restore the original values and re-raise
1310
self._history = orig_history
1311
self._cache = orig_cache
1238
encode_utf8 = cache_utf8.encode
1239
for version_id, options, pos, size, parents in versions:
1240
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1244
self._version_list_to_index(parents))
1245
assert isinstance(line, str), \
1246
'content must be utf-8 encoded: %r' % (line,)
1248
self._transport.append(self._filename, StringIO(''.join(lines)))
1249
# cache after writing, so that a failed write leads to missing cache
1250
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1251
# failure, reload the index or flush it or some such, to prevent
1252
# writing records that did complete twice.
1253
for version_id, options, pos, size, parents in versions:
1254
self._cache_version(version_id, options, pos, size, parents)
1314
1256
def has_version(self, version_id):
1315
1257
"""True if the version is in the index."""
1316
return version_id in self._cache
1258
return self._cache.has_key(version_id)
1318
1260
def get_position(self, version_id):
1319
1261
"""Return data position and size of specified version."""
1320
entry = self._cache[version_id]
1321
return entry[2], entry[3]
1262
return (self._cache[version_id][2], \
1263
self._cache[version_id][3])
1323
1265
def get_method(self, version_id):
1324
1266
"""Return compression method of specified version."""
1349
1286
def check_versions_present(self, version_ids):
1350
1287
"""Check that all specified versions are present."""
1352
for version_id in version_ids:
1353
if version_id not in cache:
1354
raise RevisionNotPresent(version_id, self._filename)
1357
class KnitGraphIndex(object):
1358
"""A knit index that builds on GraphIndex."""
1360
def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):
1361
"""Construct a KnitGraphIndex on a graph_index.
1363
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1364
:param deltas: Allow delta-compressed records.
1365
:param add_callback: If not None, allow additions to the index and call
1366
this callback with a list of added GraphIndex nodes:
1367
[(node, value, node_refs), ...]
1368
:param parents: If True, record knits parents, if not do not record
1371
self._graph_index = graph_index
1372
self._deltas = deltas
1373
self._add_callback = add_callback
1374
self._parents = parents
1375
if deltas and not parents:
1376
raise KnitCorrupt(self, "Cannot do delta compression without "
1379
def _get_entries(self, keys, check_present=False):
1380
"""Get the entries for keys.
1382
:param keys: An iterable of index keys, - 1-tuples.
1387
for node in self._graph_index.iter_entries(keys):
1389
found_keys.add(node[0])
1391
# adapt parentless index to the rest of the code.
1392
for node in self._graph_index.iter_entries(keys):
1393
yield node[0], node[1], ()
1394
found_keys.add(node[0])
1396
missing_keys = keys.difference(found_keys)
1398
raise RevisionNotPresent(missing_keys.pop(), self)
1400
def _present_keys(self, version_ids):
1402
node[0] for node in self._get_entries(version_ids)])
1404
def _parentless_ancestry(self, versions):
1405
"""Honour the get_ancestry API for parentless knit indices."""
1406
wanted_keys = self._version_ids_to_keys(versions)
1407
present_keys = self._present_keys(wanted_keys)
1408
missing = set(wanted_keys).difference(present_keys)
1410
raise RevisionNotPresent(missing.pop(), self)
1411
return list(self._keys_to_version_ids(present_keys))
1413
def get_ancestry(self, versions, topo_sorted=True):
1414
"""See VersionedFile.get_ancestry."""
1415
if not self._parents:
1416
return self._parentless_ancestry(versions)
1417
# XXX: This will do len(history) index calls - perhaps
1418
# it should be altered to be a index core feature?
1419
# get a graph of all the mentioned versions:
1422
versions = self._version_ids_to_keys(versions)
1423
pending = set(versions)
1425
# get all pending nodes
1426
this_iteration = pending
1427
new_nodes = self._get_entries(this_iteration)
1430
for (key, value, node_refs) in new_nodes:
1431
# dont ask for ghosties - otherwise
1432
# we we can end up looping with pending
1433
# being entirely ghosted.
1434
graph[key] = [parent for parent in node_refs[0]
1435
if parent not in ghosts]
1437
for parent in graph[key]:
1438
# dont examine known nodes again
1443
ghosts.update(this_iteration.difference(found))
1444
if versions.difference(graph):
1445
raise RevisionNotPresent(versions.difference(graph).pop(), self)
1447
result_keys = topo_sort(graph.items())
1449
result_keys = graph.iterkeys()
1450
return [key[0] for key in result_keys]
1452
def get_ancestry_with_ghosts(self, versions):
1453
"""See VersionedFile.get_ancestry."""
1454
if not self._parents:
1455
return self._parentless_ancestry(versions)
1456
# XXX: This will do len(history) index calls - perhaps
1457
# it should be altered to be a index core feature?
1458
# get a graph of all the mentioned versions:
1460
versions = self._version_ids_to_keys(versions)
1461
pending = set(versions)
1463
# get all pending nodes
1464
this_iteration = pending
1465
new_nodes = self._get_entries(this_iteration)
1467
for (key, value, node_refs) in new_nodes:
1468
graph[key] = node_refs[0]
1470
for parent in graph[key]:
1471
# dont examine known nodes again
1475
missing_versions = this_iteration.difference(graph)
1476
missing_needed = versions.intersection(missing_versions)
1478
raise RevisionNotPresent(missing_needed.pop(), self)
1479
for missing_version in missing_versions:
1480
# add a key, no parents
1481
graph[missing_version] = []
1482
pending.discard(missing_version) # don't look for it
1483
result_keys = topo_sort(graph.items())
1484
return [key[0] for key in result_keys]
1486
def get_graph(self):
1487
"""Return a list of the node:parents lists from this knit index."""
1488
if not self._parents:
1489
return [(key, ()) for key in self.get_versions()]
1491
for key, value, refs in self._graph_index.iter_all_entries():
1492
result.append((key[0], tuple([ref[0] for ref in refs[0]])))
1495
def iter_parents(self, version_ids):
1496
"""Iterate through the parents for many version ids.
1498
:param version_ids: An iterable yielding version_ids.
1499
:return: An iterator that yields (version_id, parents). Requested
1500
version_ids not present in the versioned file are simply skipped.
1501
The order is undefined, allowing for different optimisations in
1502
the underlying implementation.
1505
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1507
present_parents = set()
1508
for node in all_nodes:
1509
all_parents.update(node[2][0])
1510
# any node we are querying must be present
1511
present_parents.add(node[0])
1512
unknown_parents = all_parents.difference(present_parents)
1513
present_parents.update(self._present_keys(unknown_parents))
1514
for node in all_nodes:
1516
for parent in node[2][0]:
1517
if parent in present_parents:
1518
parents.append(parent[0])
1519
yield node[0][0], tuple(parents)
1521
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1522
yield node[0][0], ()
1524
def num_versions(self):
1525
return len(list(self._graph_index.iter_all_entries()))
1527
__len__ = num_versions
1529
def get_versions(self):
1530
"""Get all the versions in the file. not topologically sorted."""
1531
return [node[0][0] for node in self._graph_index.iter_all_entries()]
1533
def has_version(self, version_id):
1534
"""True if the version is in the index."""
1535
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1
1537
def _keys_to_version_ids(self, keys):
1538
return tuple(key[0] for key in keys)
1540
def get_position(self, version_id):
1541
"""Return data position and size of specified version."""
1542
bits = self._get_node(version_id)[1][1:].split(' ')
1543
return int(bits[0]), int(bits[1])
1545
def get_method(self, version_id):
1546
"""Return compression method of specified version."""
1547
if not self._deltas:
1549
return self._parent_compression(self._get_node(version_id)[2][1])
1551
def _parent_compression(self, reference_list):
1552
# use the second reference list to decide if this is delta'd or not.
1553
if len(reference_list):
1558
def _get_node(self, version_id):
1559
return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]
1561
def get_options(self, version_id):
1562
"""Return a string represention options.
1566
node = self._get_node(version_id)
1567
if not self._deltas:
1568
options = ['fulltext']
1570
options = [self._parent_compression(node[2][1])]
1571
if node[1][0] == 'N':
1572
options.append('no-eol')
1575
def get_parents(self, version_id):
1576
"""Return parents of specified version ignoring ghosts."""
1577
parents = list(self.iter_parents([version_id]))
1580
raise errors.RevisionNotPresent(version_id, self)
1581
return parents[0][1]
1583
def get_parents_with_ghosts(self, version_id):
1584
"""Return parents of specified version with ghosts."""
1585
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1586
check_present=True))
1587
if not self._parents:
1589
return self._keys_to_version_ids(nodes[0][2][0])
1591
def check_versions_present(self, version_ids):
1592
"""Check that all specified versions are present."""
1593
keys = self._version_ids_to_keys(version_ids)
1594
present = self._present_keys(keys)
1595
missing = keys.difference(present)
1597
raise RevisionNotPresent(missing.pop(), self)
1599
def add_version(self, version_id, options, pos, size, parents):
1600
"""Add a version record to the index."""
1601
return self.add_versions(((version_id, options, pos, size, parents),))
1603
def add_versions(self, versions):
1604
"""Add multiple versions to the index.
1606
This function does not insert data into the Immutable GraphIndex
1607
backing the KnitGraphIndex, instead it prepares data for insertion by
1608
the caller and checks that it is safe to insert then calls
1609
self._add_callback with the prepared GraphIndex nodes.
1611
:param versions: a list of tuples:
1612
(version_id, options, pos, size, parents).
1614
if not self._add_callback:
1615
raise errors.ReadOnlyError(self)
1616
# we hope there are no repositories with inconsistent parentage
1621
for (version_id, options, pos, size, parents) in versions:
1622
# index keys are tuples:
1623
key = (version_id, )
1624
parents = tuple((parent, ) for parent in parents)
1625
if 'no-eol' in options:
1629
value += "%d %d" % (pos, size)
1630
if not self._deltas:
1631
if 'line-delta' in options:
1632
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")
1635
if 'line-delta' in options:
1636
node_refs = (parents, (parents[0],))
1638
node_refs = (parents, ())
1640
node_refs = (parents, )
1643
raise KnitCorrupt(self, "attempt to add node with parents "
1644
"in parentless index.")
1646
keys[key] = (value, node_refs)
1647
present_nodes = self._get_entries(keys)
1648
for (key, value, node_refs) in present_nodes:
1649
if (value, node_refs) != keys[key]:
1650
raise KnitCorrupt(self, "inconsistent details in add_versions"
1651
": %s %s" % ((value, node_refs), keys[key]))
1655
for key, (value, node_refs) in keys.iteritems():
1656
result.append((key, value, node_refs))
1658
for key, (value, node_refs) in keys.iteritems():
1659
result.append((key, value))
1660
self._add_callback(result)
1662
def _version_ids_to_keys(self, version_ids):
1663
return set((version_id, ) for version_id in version_ids)
1288
version_ids = set(version_ids)
1289
for version_id in list(version_ids):
1290
if version_id in self._cache:
1291
version_ids.remove(version_id)
1293
raise RevisionNotPresent(list(version_ids)[0], self.filename)
1666
1296
class _KnitData(_KnitComponentFile):
1667
1297
"""Contents of the knit data file"""
1669
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1670
create_parent_dir=False, delay_create=False,
1672
_KnitComponentFile.__init__(self, transport, filename, mode,
1673
file_mode=file_mode,
1674
create_parent_dir=create_parent_dir,
1299
HEADER = "# bzr knit data 8\n"
1301
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1302
_KnitComponentFile.__init__(self, transport, filename, mode)
1676
1303
self._checked = False
1677
1304
# TODO: jam 20060713 conceptually, this could spill to disk
1678
1305
# if the cached size gets larger than a certain amount
1765
1372
as (stream, header_record)
1767
1374
df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
1769
rec = self._check_header(version_id, df.readline())
1770
except Exception, e:
1771
raise KnitCorrupt(self._filename,
1772
"While reading {%s} got %s(%s)"
1773
% (version_id, e.__class__.__name__, str(e)))
1375
rec = df.readline().split()
1377
raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')
1378
if cache_utf8.decode(rec[1]) != version_id:
1379
raise KnitCorrupt(self._filename,
1380
'unexpected version, wanted %r, got %r' % (
1381
version_id, rec[1]))
1776
def _check_header(self, version_id, line):
1779
raise KnitCorrupt(self._filename,
1780
'unexpected number of elements in record header')
1781
if rec[1] != version_id:
1782
raise KnitCorrupt(self._filename,
1783
'unexpected version, wanted %r, got %r'
1784
% (version_id, rec[1]))
1787
1384
def _parse_record(self, version_id, data):
1788
1385
# profiling notes:
1789
1386
# 4168 calls in 2880 217 internal
1790
1387
# 4168 calls to _parse_record_header in 2121
1791
1388
# 4168 calls to readlines in 330
1792
df = GzipFile(mode='rb', fileobj=StringIO(data))
1795
record_contents = df.readlines()
1796
except Exception, e:
1797
raise KnitCorrupt(self._filename,
1798
"While reading {%s} got %s(%s)"
1799
% (version_id, e.__class__.__name__, str(e)))
1800
header = record_contents.pop(0)
1801
rec = self._check_header(version_id, header)
1803
last_line = record_contents.pop()
1804
if len(record_contents) != int(rec[2]):
1805
raise KnitCorrupt(self._filename,
1806
'incorrect number of lines %s != %s'
1808
% (len(record_contents), int(rec[2]),
1810
if last_line != 'end %s\n' % rec[1]:
1811
raise KnitCorrupt(self._filename,
1812
'unexpected version end line %r, wanted %r'
1813
% (last_line, version_id))
1389
df, rec = self._parse_record_header(version_id, data)
1390
record_contents = df.readlines()
1391
l = record_contents.pop()
1392
assert len(record_contents) == int(rec[2])
1393
if l != 'end %s\n' % cache_utf8.encode(version_id):
1394
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
1815
1397
return record_contents, rec[3]