77
76
import bzrlib.errors as errors
78
77
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
79
78
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
80
79
RevisionNotPresent, RevisionAlreadyPresent
81
from bzrlib.tuned_gzip import GzipFile
80
from bzrlib.tuned_gzip import *
82
81
from bzrlib.trace import mutter
83
82
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
85
84
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
86
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
87
85
from bzrlib.tsort import topo_sort
88
86
import bzrlib.weave
275
270
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
276
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
271
factory=None, basis_knit=None, delta=True, create=False):
278
272
"""Construct a knit at location specified by relpath.
280
274
:param create: If not True, only open an existing knit.
282
if deprecated_passed(basis_knit):
283
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
284
" deprecated as of bzr 0.9.",
285
DeprecationWarning, stacklevel=2)
286
276
if access_mode is None:
287
277
access_mode = 'w'
288
278
super(KnitVersionedFile, self).__init__(access_mode)
289
279
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode
280
assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \
290
283
self.transport = transport
291
284
self.filename = relpath
285
self.basis_knit = basis_knit
292
286
self.factory = factory or KnitAnnotateFactory()
293
287
self.writable = (access_mode == 'w')
294
288
self.delta = delta
519
512
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))
520
513
return diff_hunks
522
def _get_components_positions(self, version_ids):
523
"""Produce a map of position data for the components of versions.
525
This data is intended to be used for retrieving the knit records.
527
A dict of version_id to (method, data_pos, data_size, next) is
529
method is the way referenced data should be applied.
530
data_pos is the position of the data in the knit.
531
data_size is the size of the data in the knit.
532
next is the build-parent of the version, or None for fulltexts.
515
def _get_component_versions(self, version_id):
516
basis = self.basis_knit
523
if basis and basis._index.has_version(cursor):
525
basis_versions.append(cursor)
526
method = picked_knit._index.get_method(cursor)
527
needed_versions.append((method, cursor))
528
if method == 'fulltext':
530
cursor = picked_knit.get_parents(cursor)[0]
531
return needed_versions, basis_versions
533
def _get_component_positions(self, version_id):
534
needed_versions, basis_versions = \
535
self._get_component_versions(version_id)
536
assert len(basis_versions) == 0
538
for method, comp_id in needed_versions:
539
data_pos, data_size = self._index.get_position(comp_id)
540
positions.append((method, comp_id, data_pos, data_size))
543
def _get_components(self, version_id):
544
"""Return a list of (version_id, method, data) tuples that
545
makes up version specified by version_id of the knit.
547
The components should be applied in the order of the returned
550
The basis knit will be used to the largest extent possible
551
since it is assumed that accesses to it is faster.
535
for version_id in version_ids:
538
while cursor is not None and cursor not in component_data:
539
method = self._index.get_method(cursor)
540
if method == 'fulltext':
543
next = self.get_parents(cursor)[0]
544
data_pos, data_size = self._index.get_position(cursor)
545
component_data[cursor] = (method, data_pos, data_size, next)
547
return component_data
554
# 4168 calls in 14912, 2289 internal
555
# 4168 in 9711 to read_records
556
# 52554 in 1250 to get_parents
557
# 170166 in 865 to list.append
559
# needed_revisions holds a list of (method, version_id) of
560
# versions that is needed to be fetched to construct the final
561
# version of the file.
563
# basis_revisions is a list of versions that needs to be
564
# fetched but exists in the basis knit.
566
needed_versions, basis_versions = \
567
self._get_component_versions(version_id)
571
assert False, "I am broken"
572
basis = self.basis_knit
574
for comp_id in basis_versions:
575
data_pos, data_size = basis._index.get_data_position(comp_id)
576
records.append((piece_id, data_pos, data_size))
577
components.update(basis._data.read_records(records))
580
for comp_id in [vid for method, vid in needed_versions
581
if vid not in basis_versions]:
582
data_pos, data_size = self._index.get_position(comp_id)
583
records.append((comp_id, data_pos, data_size))
584
components.update(self._data.read_records(records))
586
# get_data_records returns a mapping with the version id as
587
# index and the value as data. The order the components need
588
# to be applied is held by needed_versions (reversed).
590
for method, comp_id in reversed(needed_versions):
591
out.append((comp_id, method, components[comp_id]))
549
595
def _get_content(self, version_id, parent_texts={}):
550
596
"""Returns a content object that makes up the specified
556
602
if cached_version is not None:
557
603
return cached_version
559
text_map, contents_map = self._get_content_maps([version_id])
560
return contents_map[version_id]
605
if self.basis_knit and version_id in self.basis_knit:
606
return self.basis_knit._get_content(version_id)
609
components = self._get_components(version_id)
610
for component_id, method, (data, digest) in components:
611
version_idx = self._index.lookup(component_id)
612
if method == 'fulltext':
613
assert content is None
614
content = self.factory.parse_fulltext(data, version_idx)
615
elif method == 'line-delta':
616
delta = self.factory.parse_line_delta(data, version_idx)
617
content._lines = self._apply_delta(content._lines, delta)
619
if 'no-eol' in self._index.get_options(version_id):
620
line = content._lines[-1][1].rstrip('\n')
621
content._lines[-1] = (content._lines[-1][0], line)
623
# digest here is the digest from the last applied component.
624
if sha_strings(content.text()) != digest:
625
raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)
562
629
def _check_versions_present(self, version_ids):
563
630
"""Check that all specified versions are present."""
676
743
"""See VersionedFile.get_lines()."""
677
744
return self.get_line_list([version_id])[0]
679
def _get_record_map(self, version_ids):
680
"""Produce a dictionary of knit records.
682
The keys are version_ids, the values are tuples of (method, content,
684
method is the way the content should be applied.
685
content is a KnitContent object.
686
digest is the SHA1 digest of this version id after all steps are done
687
next is the build-parent of the version, i.e. the leftmost ancestor.
688
If the method is fulltext, next will be None.
690
position_map = self._get_components_positions(version_ids)
691
# c = component_id, m = method, p = position, s = size, n = next
692
records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]
694
for component_id, content, digest in\
695
self._data.read_records_iter(records):
696
method, position, size, next = position_map[component_id]
697
record_map[component_id] = method, content, digest, next
746
def _get_version_components(self, position_map):
748
for version_id, positions in position_map.iteritems():
749
for method, comp_id, position, size in positions:
750
records.append((comp_id, position, size))
751
record_map = self._data.read_records(records)
754
for version_id, positions in position_map.iteritems():
756
for method, comp_id, position, size in positions:
757
data, digest = record_map[comp_id]
758
components.append((comp_id, method, data, digest))
759
component_map[version_id] = components
701
762
def get_text(self, version_id):
702
763
"""See VersionedFile.get_text"""
708
769
def get_line_list(self, version_ids):
709
770
"""Return the texts of listed versions as a list of strings."""
710
text_map, content_map = self._get_content_maps(version_ids)
711
return [text_map[v] for v in version_ids]
713
def _get_content_maps(self, version_ids):
714
"""Produce maps of text and KnitContents
716
:return: (text_map, content_map) where text_map contains the texts for
717
the requested versions and content_map contains the KnitContents.
718
Both dicts take version_ids as their keys.
720
772
for version_id in version_ids:
721
773
if not self.has_version(version_id):
722
774
raise RevisionNotPresent(version_id, self.filename)
723
record_map = self._get_record_map(version_ids)
775
position_map[version_id] = \
776
self._get_component_positions(version_id)
778
version_components = self._get_version_components(position_map).items()
728
for version_id in version_ids:
731
while cursor is not None:
732
method, data, digest, next = record_map[cursor]
733
components.append((cursor, method, data, digest))
734
if cursor in content_map:
781
for version_id, components in version_components:
739
783
for component_id, method, data, digest in reversed(components):
740
if component_id in content_map:
741
content = content_map[component_id]
743
version_idx = self._index.lookup(component_id)
744
if method == 'fulltext':
745
assert content is None
746
content = self.factory.parse_fulltext(data, version_idx)
747
elif method == 'line-delta':
748
delta = self.factory.parse_line_delta(data[:],
750
content = content.copy()
751
content._lines = self._apply_delta(content._lines,
753
content_map[component_id] = content
784
version_idx = self._index.lookup(component_id)
785
if method == 'fulltext':
786
assert content is None
787
content = self.factory.parse_fulltext(data, version_idx)
788
elif method == 'line-delta':
789
delta = self.factory.parse_line_delta(data, version_idx)
790
content._lines = self._apply_delta(content._lines, delta)
755
792
if 'no-eol' in self._index.get_options(version_id):
756
content = content.copy()
757
793
line = content._lines[-1][1].rstrip('\n')
758
794
content._lines[-1] = (content._lines[-1][0], line)
759
final_content[version_id] = content
761
796
# digest here is the digest from the last applied component.
762
text = content.text()
763
if sha_strings(text) != digest:
797
if sha_strings(content.text()) != digest:
764
798
raise KnitCorrupt(self.filename,
765
799
'sha-1 does not match %s' % version_id)
767
text_map[version_id] = text
768
return text_map, final_content
801
text_map[version_id] = content.text()
802
return [text_map[v] for v in version_ids]
770
804
def iter_lines_added_or_present_in_versions(self, version_ids=None):
771
805
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1368
1405
It will actively recompress currently cached records on the
1369
1406
basis that that is cheaper than I/O activity.
1409
for version_id, pos, size in records:
1410
if version_id not in self._records:
1411
needed_records.append((version_id, pos, size))
1371
1413
# setup an iterator of the external records:
1372
1414
# uses readv so nice and fast we hope.
1415
if len(needed_records):
1374
1416
# grab the disk data needed.
1375
1417
raw_records = self._transport.readv(self._filename,
1376
[(pos, size) for version_id, pos, size in records])
1418
[(pos, size) for version_id, pos, size in needed_records])
1378
1420
for version_id, pos, size in records:
1379
pos, data = raw_records.next()
1380
# validate the header
1381
df, rec = self._parse_record_header(version_id, data)
1383
yield version_id, data
1421
if version_id in self._records:
1422
# compress a new version
1423
size, sio = self._record_to_data(version_id,
1424
self._records[version_id][0],
1425
self._records[version_id][1])
1426
yield version_id, sio.getvalue()
1428
pos, data = raw_records.next()
1429
# validate the header
1430
df, rec = self._parse_record_header(version_id, data)
1432
yield version_id, data
1385
1435
def read_records_iter(self, records):
1386
1436
"""Read text records from data file and yield result.
1389
1439
will be read in the given order. Yields (version_id,
1390
1440
contents, digest).
1392
if len(records) == 0:
1394
1442
# profiling notes:
1395
1443
# 60890 calls for 4168 extractions in 5045, 683 internal.
1396
1444
# 4168 calls to readv in 1411
1397
1445
# 4168 calls to parse_record in 2880
1399
# Get unique records, sorted by position
1400
needed_records = sorted(set(records), key=operator.itemgetter(1))
1402
# We take it that the transport optimizes the fetching as good
1403
# as possible (ie, reads continuous ranges.)
1404
response = self._transport.readv(self._filename,
1405
[(pos, size) for version_id, pos, size in needed_records])
1408
for (record_id, pos, size), (pos, data) in \
1409
izip(iter(needed_records), response):
1410
content, digest = self._parse_record(record_id, data)
1411
record_map[record_id] = (digest, content)
1413
for version_id, pos, size in records:
1414
digest, content = record_map[version_id]
1415
yield version_id, content, digest
1448
for version_id, pos, size in records:
1449
if version_id not in self._records:
1450
needed_records.append((version_id, pos, size))
1452
if len(needed_records):
1453
needed_records.sort(key=operator.itemgetter(1))
1454
# We take it that the transport optimizes the fetching as good
1455
# as possible (ie, reads continuous ranges.)
1456
response = self._transport.readv(self._filename,
1457
[(pos, size) for version_id, pos, size in needed_records])
1459
for (record_id, pos, size), (pos, data) in \
1460
izip(iter(needed_records), response):
1461
content, digest = self._parse_record(record_id, data)
1462
self._records[record_id] = (digest, content)
1464
for version_id, pos, size in records:
1465
yield version_id, list(self._records[version_id][1]), self._records[version_id][0]
1417
1467
def read_records(self, records):
1418
1468
"""Read records into a dictionary."""