1124
1128
self._history = []
1125
1129
decode_utf8 = cache_utf8.decode
1126
pb = bzrlib.ui.ui_factory.nested_progress_bar()
1130
pb = ui.ui_factory.nested_progress_bar()
1132
pb.update('read knit index', 0, 1)
1131
pb.update('read knit index', count, total)
1132
1134
fp = self._transport.get(self._filename)
1134
self.check_header(fp)
1135
# readlines reads the whole file at once:
1136
# bad for transports like http, good for local disk
1137
# we save 60 ms doing this one change (
1138
# from calling readline each time to calling
1140
# probably what we want for nice behaviour on
1141
# http is a incremental readlines that yields, or
1142
# a check for local vs non local indexes,
1143
for l in fp.readlines():
1145
if len(rec) < 5 or rec[-1] != ':':
1147
# FIXME: in the future we should determine if its a
1148
# short write - and ignore it
1149
# or a different failure, and raise. RBC 20060407
1153
#pb.update('read knit index', count, total)
1154
# See self._parse_parents
1156
for value in rec[4:-1]:
1158
# uncompressed reference
1159
parents.append(decode_utf8(value[1:]))
1161
# this is 15/4000ms faster than isinstance,
1163
# this function is called thousands of times a
1164
# second so small variations add up.
1165
assert value.__class__ is str
1166
parents.append(self._history[int(value)])
1167
# end self._parse_parents
1168
# self._cache_version(decode_utf8(rec[0]),
1169
# rec[1].split(','),
1173
# --- self._cache_version
1174
# only want the _history index to reference the 1st
1175
# index entry for version_id
1176
version_id = decode_utf8(rec[0])
1177
if version_id not in self._cache:
1178
index = len(self._history)
1179
self._history.append(version_id)
1181
index = self._cache[version_id][5]
1182
self._cache[version_id] = (version_id,
1188
# --- self._cache_version
1136
# _load_data may raise NoSuchFile if the target knit is
1191
except NoSuchFile, e:
1192
1142
if mode != 'w' or not create:
1195
1145
self._need_to_create = True
1197
self._transport.put_bytes_non_atomic(self._filename,
1198
self.HEADER, mode=self._file_mode)
1147
self._transport.put_bytes_non_atomic(
1148
self._filename, self.HEADER, mode=self._file_mode)
1201
pb.update('read knit index', total, total)
1150
pb.update('read knit index', 1, 1)
1204
def _parse_parents(self, compressed_parents):
1205
"""convert a list of string parent values into version ids.
1207
ints are looked up in the index.
1208
.FOO values are ghosts and converted in to FOO.
1210
NOTE: the function is retained here for clarity, and for possible
1211
use in partial index reads. However bulk processing now has
1212
it inlined in __init__ for inner-loop optimisation.
1215
for value in compressed_parents:
1216
if value[-1] == '.':
1217
# uncompressed reference
1218
result.append(cache_utf8.decode_utf8(value[1:]))
1153
def _load_data(self, fp):
1155
history = self._history
1156
decode_utf8 = cache_utf8.decode
1158
self.check_header(fp)
1159
# readlines reads the whole file at once:
1160
# bad for transports like http, good for local disk
1161
# we save 60 ms doing this one change (
1162
# from calling readline each time to calling
1164
# probably what we want for nice behaviour on
1165
# http is a incremental readlines that yields, or
1166
# a check for local vs non local indexes,
1167
history_top = len(history) - 1
1168
for line in fp.readlines():
1170
if len(rec) < 5 or rec[-1] != ':':
1172
# FIXME: in the future we should determine if its a
1173
# short write - and ignore it
1174
# or a different failure, and raise. RBC 20060407
1178
for value in rec[4:-1]:
1180
# uncompressed reference
1181
parents.append(decode_utf8(value[1:]))
1183
parents.append(history[int(value)])
1185
version_id, options, pos, size = rec[:4]
1186
version_id = decode_utf8(version_id)
1188
# See self._cache_version
1189
# only want the _history index to reference the 1st
1190
# index entry for version_id
1191
if version_id not in cache:
1194
history.append(version_id)
1220
# this is 15/4000ms faster than isinstance,
1221
# this function is called thousands of times a
1222
# second so small variations add up.
1223
assert value.__class__ is str
1224
result.append(self._history[int(value)])
1196
index = cache[version_id][5]
1197
cache[version_id] = (version_id,
1203
# end self._cache_version
1227
1205
def get_graph(self):
1229
for version_id, index in self._cache.iteritems():
1230
graph.append((version_id, index[4]))
1206
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
1233
1208
def get_ancestry(self, versions):
1234
1209
"""See VersionedFile.get_ancestry."""
1235
1210
# get a graph of all the mentioned versions:
1237
1212
pending = set(versions)
1239
1215
version = pending.pop()
1240
parents = self._cache[version][4]
1241
# got the parents ok
1243
parents = [parent for parent in parents if parent in self._cache]
1244
for parent in parents:
1245
# if not completed and not a ghost
1246
if parent not in graph:
1218
parents = [p for p in cache[version][4] if p in cache]
1220
raise RevisionNotPresent(version, self._filename)
1221
# if not completed and not a ghost
1222
pending.update([p for p in parents if p not in graph])
1248
1223
graph[version] = parents
1249
1224
return topo_sort(graph.items())
1251
1226
def get_ancestry_with_ghosts(self, versions):
1252
1227
"""See VersionedFile.get_ancestry_with_ghosts."""
1253
1228
# get a graph of all the mentioned versions:
1229
self.check_versions_present(versions)
1255
1232
pending = set(versions)
1257
1234
version = pending.pop()
1259
parents = self._cache[version][4]
1236
parents = cache[version][4]
1260
1237
except KeyError:
1261
1238
# ghost, fake it
1262
1239
graph[version] = []
1265
# got the parents ok
1266
for parent in parents:
1267
if parent not in graph:
1242
pending.update([p for p in parents if p not in graph])
1269
1243
graph[version] = parents
1270
1244
return topo_sort(graph.items())
1482
1456
as (stream, header_record)
1484
1458
df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
1485
rec = df.readline().split()
1459
rec = self._check_header(version_id, df.readline())
1462
def _check_header(self, version_id, line):
1486
1464
if len(rec) != 4:
1487
raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')
1465
raise KnitCorrupt(self._filename,
1466
'unexpected number of elements in record header')
1488
1467
if cache_utf8.decode(rec[1]) != version_id:
1489
raise KnitCorrupt(self._filename,
1490
'unexpected version, wanted %r, got %r' % (
1491
version_id, rec[1]))
1468
raise KnitCorrupt(self._filename,
1469
'unexpected version, wanted %r, got %r'
1470
% (version_id, rec[1]))
1494
1473
def _parse_record(self, version_id, data):
1495
1474
# profiling notes:
1496
1475
# 4168 calls in 2880 217 internal
1497
1476
# 4168 calls to _parse_record_header in 2121
1498
1477
# 4168 calls to readlines in 330
1499
df, rec = self._parse_record_header(version_id, data)
1478
df = GzipFile(mode='rb', fileobj=StringIO(data))
1500
1480
record_contents = df.readlines()
1501
l = record_contents.pop()
1481
header = record_contents.pop(0)
1482
rec = self._check_header(version_id, header)
1484
last_line = record_contents.pop()
1502
1485
assert len(record_contents) == int(rec[2])
1503
if l != 'end %s\n' % cache_utf8.encode(version_id):
1504
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
1486
if last_line != 'end %s\n' % rec[1]:
1487
raise KnitCorrupt(self._filename,
1488
'unexpected version end line %r, wanted %r'
1489
% (last_line, version_id))
1507
1491
return record_contents, rec[3]