156
157
class _KnitFactory(object):
157
158
"""Base factory for creating content objects."""
159
def make(self, lines, version):
160
def make(self, lines, version_id):
160
161
num_lines = len(lines)
161
return KnitContent(zip([version] * num_lines, lines))
162
return KnitContent(zip([version_id] * num_lines, lines))
164
165
class KnitAnnotateFactory(_KnitFactory):
169
def parse_fulltext(self, content, version):
170
def parse_fulltext(self, content, version_id):
170
171
"""Convert fulltext to internal representation
172
173
fulltext content is of the format
174
175
internal representation is of the format:
175
176
(revid, plaintext)
177
decode_utf8 = cache_utf8.decode
180
origin, text = line.split(' ', 1)
181
lines.append((decode_utf8(origin), text))
178
# TODO: jam 20070209 The tests expect this to be returned as tuples,
179
# but the code itself doesn't really depend on that.
180
# Figure out a way to not require the overhead of turning the
181
# list back into tuples.
182
lines = [tuple(line.split(' ', 1)) for line in content]
182
183
return KnitContent(lines)
184
185
def parse_line_delta_iter(self, lines):
185
186
return iter(self.parse_line_delta(lines))
187
def parse_line_delta(self, lines, version):
188
def parse_line_delta(self, lines, version_id):
188
189
"""Convert a line based delta into internal representation.
190
191
line delta is in the form of:
194
195
internal representation is
195
196
(start, end, count, [1..count tuples (revid, newline)])
197
decode_utf8 = cache_utf8.decode
199
199
lines = iter(lines)
200
200
next = lines.next
203
def cache_and_return(line):
204
origin, text = line.split(' ', 1)
205
return cache.setdefault(origin, origin), text
201
207
# walk through the lines parsing.
202
208
for header in lines:
203
209
start, end, count = [int(n) for n in header.split(',')]
207
origin, text = next().split(' ', 1)
209
contents.append((decode_utf8(origin), text))
210
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
210
211
result.append((start, end, count, contents))
235
236
see parse_fulltext which this inverts.
237
encode_utf8 = cache_utf8.encode
238
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]
238
# TODO: jam 20070209 We only do the caching thing to make sure that
239
# the origin is a valid utf-8 line, eventually we could remove it
240
return ['%s %s' % (o, t) for o, t in content._lines]
240
242
def lower_line_delta(self, delta):
241
243
"""convert a delta into a serializable form.
243
245
See parse_line_delta which this inverts.
245
encode_utf8 = cache_utf8.encode
247
# TODO: jam 20070209 We only do the caching thing to make sure that
248
# the origin is a valid utf-8 line, eventually we could remove it
247
250
for start, end, c, lines in delta:
248
251
out.append('%d,%d,%d\n' % (start, end, c))
249
out.extend(encode_utf8(origin) + ' ' + text
252
out.extend(origin + ' ' + text
250
253
for origin, text in lines)
257
260
annotated = False
259
def parse_fulltext(self, content, version):
262
def parse_fulltext(self, content, version_id):
260
263
"""This parses an unannotated fulltext.
262
265
Note that this is not a noop - the internal representation
263
266
has (versionid, line) - its just a constant versionid.
265
return self.make(content, version)
268
return self.make(content, version_id)
267
def parse_line_delta_iter(self, lines, version):
270
def parse_line_delta_iter(self, lines, version_id):
269
272
num_lines = len(lines)
270
273
while cur < num_lines:
271
274
header = lines[cur]
273
276
start, end, c = [int(n) for n in header.split(',')]
274
yield start, end, c, zip([version] * c, lines[cur:cur+c])
277
yield start, end, c, zip([version_id] * c, lines[cur:cur+c])
277
def parse_line_delta(self, lines, version):
278
return list(self.parse_line_delta_iter(lines, version))
280
def parse_line_delta(self, lines, version_id):
281
return list(self.parse_line_delta_iter(lines, version_id))
280
283
def get_fulltext_content(self, lines):
281
284
"""Extract just the content lines from a fulltext."""
500
503
return KnitVersionedFile(name, transport, factory=self.factory,
501
504
delta=self.delta, create=True)
503
def _fix_parents(self, version, new_parents):
506
def _fix_parents(self, version_id, new_parents):
504
507
"""Fix the parents list for version.
506
509
This is done by appending a new version to the index
508
511
the parents list must be a superset of the current
511
current_values = self._index._cache[version]
514
current_values = self._index._cache[version_id]
512
515
assert set(current_values[4]).difference(set(new_parents)) == set()
513
self._index.add_version(version,
516
self._index.add_version(version_id,
514
517
current_values[1],
515
518
current_values[2],
516
519
current_values[3],
519
522
def get_delta(self, version_id):
520
523
"""Get a delta for constructing version from some other version."""
524
version_id = osutils.safe_revision_id(version_id)
521
525
self.check_not_reserved_id(version_id)
522
526
if not self.has_version(version_id):
523
527
raise RevisionNotPresent(version_id, self.filename)
530
534
data_pos, data_size = self._index.get_position(version_id)
531
535
data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]
532
version_idx = self._index.lookup(version_id)
533
536
noeol = 'no-eol' in self._index.get_options(version_id)
534
537
if 'fulltext' == self._index.get_method(version_id):
535
new_content = self.factory.parse_fulltext(data, version_idx)
538
new_content = self.factory.parse_fulltext(data, version_id)
536
539
if parent is not None:
537
540
reference_content = self._get_content(parent)
538
541
old_texts = reference_content.text()
542
545
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
543
546
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)
545
delta = self.factory.parse_line_delta(data, version_idx)
548
delta = self.factory.parse_line_delta(data, version_id)
546
549
return parent, sha1, noeol, delta
548
551
def get_graph_with_ghosts(self):
553
556
def get_sha1(self, version_id):
554
557
"""See VersionedFile.get_sha1()."""
558
version_id = osutils.safe_revision_id(version_id)
555
559
record_map = self._get_record_map([version_id])
556
560
method, content, digest, next = record_map[version_id]
564
568
def has_ghost(self, version_id):
565
569
"""True if there is a ghost reference in the file to version_id."""
570
version_id = osutils.safe_revision_id(version_id)
566
571
# maybe we have it
567
572
if self.has_version(version_id):
582
587
def has_version(self, version_id):
583
588
"""See VersionedFile.has_version."""
589
version_id = osutils.safe_revision_id(version_id)
584
590
return self._index.has_version(version_id)
586
592
__contains__ = has_version
738
744
# I/O and the time spend applying deltas.
739
745
delta = self._check_should_delta(present_parents)
747
assert isinstance(version_id, str)
741
748
lines = self.factory.make(lines, version_id)
742
749
if delta or (self.factory.annotated and len(present_parents) > 0):
743
750
# Merge annotations from parent texts if so is needed.
800
807
def get_line_list(self, version_ids):
801
808
"""Return the texts of listed versions as a list of strings."""
809
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
802
810
for version_id in version_ids:
803
811
self.check_not_reserved_id(version_id)
804
812
text_map, content_map = self._get_content_maps(version_ids)
834
842
if component_id in content_map:
835
843
content = content_map[component_id]
837
version_idx = self._index.lookup(component_id)
838
845
if method == 'fulltext':
839
846
assert content is None
840
content = self.factory.parse_fulltext(data, version_idx)
847
content = self.factory.parse_fulltext(data, version_id)
841
848
elif method == 'line-delta':
842
delta = self.factory.parse_line_delta(data, version_idx)
849
delta = self.factory.parse_line_delta(data, version_id)
843
850
content = content.copy()
844
851
content._lines = self._apply_delta(content._lines,
865
872
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
866
873
if version_ids is None:
867
874
version_ids = self.versions()
876
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
869
878
pb = progress.DummyProgress()
870
879
# we don't care about inclusions, the caller cares.
887
896
enumerate(self._data.read_records_iter(version_id_records)):
888
897
pb.update('Walking content.', version_idx, total)
889
898
method = self._index.get_method(version_id)
890
version_idx = self._index.lookup(version_id)
892
900
assert method in ('fulltext', 'line-delta')
893
901
if method == 'fulltext':
908
916
def annotate_iter(self, version_id):
909
917
"""See VersionedFile.annotate_iter."""
918
version_id = osutils.safe_revision_id(version_id)
910
919
content = self._get_content(version_id)
911
920
for origin, text in content.annotate_iter():
912
921
yield origin, text
924
934
def get_parents_with_ghosts(self, version_id):
925
935
"""See VersionedFile.get_parents."""
936
version_id = osutils.safe_revision_id(version_id)
927
938
return self._index.get_parents_with_ghosts(version_id)
968
981
def plan_merge(self, ver_a, ver_b):
969
982
"""See VersionedFile.plan_merge."""
983
ver_a = osutils.safe_revision_id(ver_a)
984
ver_b = osutils.safe_revision_id(ver_b)
970
985
ancestors_b = set(self.get_ancestry(ver_b))
971
986
def status_a(revision, text):
972
987
if revision in ancestors_b:
1175
1189
for value in rec[4:-1]:
1176
1190
if value[0] == '.':
1177
1191
# uncompressed reference
1178
parents.append(decode_utf8(value[1:]))
1192
parent_id = value[1:]
1180
parents.append(history[int(value)])
1194
parent_id = history[int(value)]
1195
parents.append(parent_id)
1182
1197
version_id, options, pos, size = rec[:4]
1183
version_id = decode_utf8(version_id)
1198
version_id = version_id
1185
1200
# See self._cache_version
1186
1201
# only want the _history index to reference the 1st
1265
1279
result_list.append(str(cache[version][5]))
1266
1280
# -- end lookup () --
1268
result_list.append('.' + encode_utf8(version))
1282
result_list.append('.' + version)
1269
1283
return ' '.join(result_list)
1271
1285
def add_version(self, version_id, options, pos, size, parents):
1279
1293
(version_id, options, pos, size, parents).
1282
encode_utf8 = cache_utf8.encode
1283
1296
orig_history = self._history[:]
1284
1297
orig_cache = self._cache.copy()
1287
1300
for version_id, options, pos, size, parents in versions:
1288
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1301
line = "\n%s %s %s %s %s :" % (version_id,
1289
1302
','.join(options),
1399
1412
sio = StringIO()
1400
1413
data_file = GzipFile(None, mode='wb', fileobj=sio)
1402
version_id_utf8 = cache_utf8.encode(version_id)
1415
assert isinstance(version_id, str)
1403
1416
data_file.writelines(chain(
1404
["version %s %d %s\n" % (version_id_utf8,
1417
["version %s %d %s\n" % (version_id,
1408
["end %s\n" % version_id_utf8]))
1421
["end %s\n" % version_id]))
1409
1422
data_file.close()
1410
1423
length= sio.tell()
1461
1474
if len(rec) != 4:
1462
1475
raise KnitCorrupt(self._filename,
1463
1476
'unexpected number of elements in record header')
1464
if cache_utf8.decode(rec[1]) != version_id:
1477
if rec[1] != version_id:
1465
1478
raise KnitCorrupt(self._filename,
1466
1479
'unexpected version, wanted %r, got %r'
1467
1480
% (version_id, rec[1]))