156
157
class _KnitFactory(object):
157
158
"""Base factory for creating content objects."""
159
def make(self, lines, version):
160
def make(self, lines, version_id):
160
161
num_lines = len(lines)
161
return KnitContent(zip([version] * num_lines, lines))
162
return KnitContent(zip([version_id] * num_lines, lines))
164
165
class KnitAnnotateFactory(_KnitFactory):
169
def parse_fulltext(self, content, version):
170
def parse_fulltext(self, content, version_id):
170
171
"""Convert fulltext to internal representation
172
173
fulltext content is of the format
174
175
internal representation is of the format:
175
176
(revid, plaintext)
177
lines = [line.split(' ', 1) for line in content]
178
# TODO: jam 20070209 The tests expect this to be returned as tuples,
179
# but the code itself doesn't really depend on that.
180
# Figure out a way to not require the overhead of turning the
181
# list back into tuples.
182
lines = [tuple(line.split(' ', 1)) for line in content]
178
183
return KnitContent(lines)
180
185
def parse_line_delta_iter(self, lines):
181
186
return iter(self.parse_line_delta(lines))
183
def parse_line_delta(self, lines, version):
188
def parse_line_delta(self, lines, version_id):
184
189
"""Convert a line based delta into internal representation.
186
191
line delta is in the form of:
197
202
# walk through the lines parsing.
198
203
for header in lines:
199
204
start, end, count = [int(n) for n in header.split(',')]
200
contents = [next().split(' ', 1) for i in xrange(count)]
205
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
201
206
result.append((start, end, count, contents))
226
231
see parse_fulltext which this inverts.
228
encode_utf8 = cache_utf8.encode
229
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]
233
# TODO: jam 20070209 We only do the caching thing to make sure that
234
# the origin is a valid utf-8 line, eventually we could remove it
235
get_cached_utf8 = cache_utf8.get_cached_utf8
236
return ['%s %s' % (get_cached_utf8(o), t) for o, t in content._lines]
231
238
def lower_line_delta(self, delta):
232
239
"""convert a delta into a serializable form.
234
241
See parse_line_delta which this inverts.
236
encode_utf8 = cache_utf8.encode
243
# TODO: jam 20070209 We only do the caching thing to make sure that
244
# the origin is a valid utf-8 line, eventually we could remove it
245
get_cached_utf8 = cache_utf8.get_cached_utf8
238
247
for start, end, c, lines in delta:
239
248
out.append('%d,%d,%d\n' % (start, end, c))
240
out.extend(encode_utf8(origin) + ' ' + text
249
out.extend(get_cached_utf8(origin) + ' ' + text
241
250
for origin, text in lines)
248
257
annotated = False
250
def parse_fulltext(self, content, version):
259
def parse_fulltext(self, content, version_id):
251
260
"""This parses an unannotated fulltext.
253
262
Note that this is not a noop - the internal representation
254
263
has (versionid, line) - its just a constant versionid.
256
return self.make(content, version)
265
return self.make(content, version_id)
258
def parse_line_delta_iter(self, lines, version):
267
def parse_line_delta_iter(self, lines, version_id):
260
269
num_lines = len(lines)
261
270
while cur < num_lines:
262
271
header = lines[cur]
264
273
start, end, c = [int(n) for n in header.split(',')]
265
yield start, end, c, zip([version] * c, lines[cur:cur+c])
274
yield start, end, c, zip([version_id] * c, lines[cur:cur+c])
268
def parse_line_delta(self, lines, version):
269
return list(self.parse_line_delta_iter(lines, version))
277
def parse_line_delta(self, lines, version_id):
278
return list(self.parse_line_delta_iter(lines, version_id))
271
280
def get_fulltext_content(self, lines):
272
281
"""Extract just the content lines from a fulltext."""
491
500
return KnitVersionedFile(name, transport, factory=self.factory,
492
501
delta=self.delta, create=True)
494
def _fix_parents(self, version, new_parents):
503
def _fix_parents(self, version_id, new_parents):
495
504
"""Fix the parents list for version.
497
506
This is done by appending a new version to the index
499
508
the parents list must be a superset of the current
502
current_values = self._index._cache[version]
511
current_values = self._index._cache[version_id]
503
512
assert set(current_values[4]).difference(set(new_parents)) == set()
504
self._index.add_version(version,
513
self._index.add_version(version_id,
505
514
current_values[1],
506
515
current_values[2],
507
516
current_values[3],
510
519
def get_delta(self, version_id):
511
520
"""Get a delta for constructing version from some other version."""
521
version_id = osutils.safe_revision_id(version_id)
512
522
self.check_not_reserved_id(version_id)
513
523
if not self.has_version(version_id):
514
524
raise RevisionNotPresent(version_id, self.filename)
521
531
data_pos, data_size = self._index.get_position(version_id)
522
532
data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]
523
version_idx = self._index.lookup(version_id)
524
533
noeol = 'no-eol' in self._index.get_options(version_id)
525
534
if 'fulltext' == self._index.get_method(version_id):
526
new_content = self.factory.parse_fulltext(data, version_idx)
535
new_content = self.factory.parse_fulltext(data, version_id)
527
536
if parent is not None:
528
537
reference_content = self._get_content(parent)
529
538
old_texts = reference_content.text()
533
542
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
534
543
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)
536
delta = self.factory.parse_line_delta(data, version_idx)
545
delta = self.factory.parse_line_delta(data, version_id)
537
546
return parent, sha1, noeol, delta
539
548
def get_graph_with_ghosts(self):
544
553
def get_sha1(self, version_id):
545
554
"""See VersionedFile.get_sha1()."""
555
version_id = osutils.safe_revision_id(version_id)
546
556
record_map = self._get_record_map([version_id])
547
557
method, content, digest, next = record_map[version_id]
555
565
def has_ghost(self, version_id):
556
566
"""True if there is a ghost reference in the file to version_id."""
567
version_id = osutils.safe_revision_id(version_id)
557
568
# maybe we have it
558
569
if self.has_version(version_id):
573
584
def has_version(self, version_id):
574
585
"""See VersionedFile.has_version."""
586
version_id = osutils.safe_revision_id(version_id)
575
587
return self._index.has_version(version_id)
577
589
__contains__ = has_version
791
803
def get_line_list(self, version_ids):
792
804
"""Return the texts of listed versions as a list of strings."""
805
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
793
806
for version_id in version_ids:
794
807
self.check_not_reserved_id(version_id)
795
808
text_map, content_map = self._get_content_maps(version_ids)
825
838
if component_id in content_map:
826
839
content = content_map[component_id]
828
version_idx = self._index.lookup(component_id)
829
841
if method == 'fulltext':
830
842
assert content is None
831
content = self.factory.parse_fulltext(data, version_idx)
843
content = self.factory.parse_fulltext(data, version_id)
832
844
elif method == 'line-delta':
833
delta = self.factory.parse_line_delta(data, version_idx)
845
delta = self.factory.parse_line_delta(data, version_id)
834
846
content = content.copy()
835
847
content._lines = self._apply_delta(content._lines,
856
868
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
857
869
if version_ids is None:
858
870
version_ids = self.versions()
872
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
860
874
pb = progress.DummyProgress()
861
875
# we don't care about inclusions, the caller cares.
878
892
enumerate(self._data.read_records_iter(version_id_records)):
879
893
pb.update('Walking content.', version_idx, total)
880
894
method = self._index.get_method(version_id)
881
version_idx = self._index.lookup(version_id)
883
896
assert method in ('fulltext', 'line-delta')
884
897
if method == 'fulltext':
899
912
def annotate_iter(self, version_id):
900
913
"""See VersionedFile.annotate_iter."""
914
version_id = osutils.safe_revision_id(version_id)
901
915
content = self._get_content(version_id)
902
916
for origin, text in content.annotate_iter():
903
917
yield origin, text
915
930
def get_parents_with_ghosts(self, version_id):
916
931
"""See VersionedFile.get_parents."""
932
version_id = osutils.safe_revision_id(version_id)
918
934
return self._index.get_parents_with_ghosts(version_id)
959
977
def plan_merge(self, ver_a, ver_b):
960
978
"""See VersionedFile.plan_merge."""
979
ver_a = osutils.safe_revision_id(ver_a)
980
ver_b = osutils.safe_revision_id(ver_b)
961
981
ancestors_b = set(self.get_ancestry(ver_b))
962
982
def status_a(revision, text):
963
983
if revision in ancestors_b:
1121
1141
# so - wc -l of a knit index is != the number of unique names
1123
1143
self._history = []
1124
decode_utf8 = cache_utf8.decode
1125
1144
pb = ui.ui_factory.nested_progress_bar()
1127
1146
pb.update('read knit index', 0, 1)
1172
1192
for value in rec[4:-1]:
1173
1193
if value[0] == '.':
1174
1194
# uncompressed reference
1175
parents.append(value[1:])
1195
parent_id = get_cached_utf8(value[1:])
1177
parents.append(history[int(value)])
1197
parent_id = history[int(value)]
1198
parents.append(parent_id)
1179
1200
version_id, options, pos, size = rec[:4]
1201
version_id = get_cached_utf8(version_id)
1181
1203
# See self._cache_version
1182
1204
# only want the _history index to reference the 1st
1252
1274
return self._cache[version_id][5]
1254
1276
def _version_list_to_index(self, versions):
1255
encode_utf8 = cache_utf8.encode
1277
# TODO: jam 20070209 We only do the caching thing to make sure that
1278
# what we have in memory is already a proper utf-8 string
1279
# Eventually we should be able to write out the index without
1280
# doing any sort of encode step
1281
get_cached_utf8 = cache_utf8.get_cached_utf8
1256
1282
result_list = []
1257
1283
cache = self._cache
1258
1284
for version in versions:
1261
1287
result_list.append(str(cache[version][5]))
1262
1288
# -- end lookup () --
1264
result_list.append('.' + encode_utf8(version))
1290
result_list.append('.' + get_cached_utf8(version))
1265
1291
return ' '.join(result_list)
1267
1293
def add_version(self, version_id, options, pos, size, parents):
1275
1301
(version_id, options, pos, size, parents).
1278
encode_utf8 = cache_utf8.encode
1304
# TODO: jam 20070209 get_cached_utf8 is just used to verify the
1305
# version_ids are indeed utf-8 eventually these calls can be
1307
get_cached_utf8 = cache_utf8.get_cached_utf8
1279
1308
orig_history = self._history[:]
1280
1309
orig_cache = self._cache.copy()
1283
1312
for version_id, options, pos, size, parents in versions:
1284
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1313
line = "\n%s %s %s %s %s :" % (get_cached_utf8(version_id),
1285
1314
','.join(options),
1395
1424
sio = StringIO()
1396
1425
data_file = GzipFile(None, mode='wb', fileobj=sio)
1398
version_id_utf8 = cache_utf8.encode(version_id)
1427
version_id_utf8 = cache_utf8.get_cached_utf8(version_id)
1399
1428
data_file.writelines(chain(
1400
1429
["version %s %d %s\n" % (version_id_utf8,
1457
1486
if len(rec) != 4:
1458
1487
raise KnitCorrupt(self._filename,
1459
1488
'unexpected number of elements in record header')
1460
if cache_utf8.decode(rec[1]) != version_id:
1489
if rec[1] != version_id:
1461
1490
raise KnitCorrupt(self._filename,
1462
1491
'unexpected version, wanted %r, got %r'
1463
1492
% (version_id, rec[1]))