76
77
import bzrlib.errors as errors
77
78
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
78
79
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
79
80
RevisionNotPresent, RevisionAlreadyPresent
80
from bzrlib.tuned_gzip import *
81
from bzrlib.tuned_gzip import GzipFile
81
82
from bzrlib.trace import mutter
82
83
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
84
85
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
86
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
85
87
from bzrlib.tsort import topo_sort
86
88
import bzrlib.weave
270
275
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
271
factory=None, basis_knit=None, delta=True, create=False):
276
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
272
278
"""Construct a knit at location specified by relpath.
274
280
:param create: If not True, only open an existing knit.
282
if deprecated_passed(basis_knit):
283
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
284
" deprecated as of bzr 0.9.",
285
DeprecationWarning, stacklevel=2)
276
286
if access_mode is None:
277
287
access_mode = 'w'
278
288
super(KnitVersionedFile, self).__init__(access_mode)
279
289
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode
280
assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \
283
290
self.transport = transport
284
291
self.filename = relpath
285
self.basis_knit = basis_knit
286
292
self.factory = factory or KnitAnnotateFactory()
287
293
self.writable = (access_mode == 'w')
288
294
self.delta = delta
512
519
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))
513
520
return diff_hunks
515
def _get_component_versions(self, version_id):
516
basis = self.basis_knit
523
if basis and basis._index.has_version(cursor):
525
basis_versions.append(cursor)
526
method = picked_knit._index.get_method(cursor)
527
needed_versions.append((method, cursor))
528
if method == 'fulltext':
530
cursor = picked_knit.get_parents(cursor)[0]
531
return needed_versions, basis_versions
533
def _get_component_positions(self, version_id):
534
needed_versions, basis_versions = \
535
self._get_component_versions(version_id)
536
assert len(basis_versions) == 0
538
for method, comp_id in needed_versions:
539
data_pos, data_size = self._index.get_position(comp_id)
540
positions.append((method, comp_id, data_pos, data_size))
543
def _get_components(self, version_id):
544
"""Return a list of (version_id, method, data) tuples that
545
makes up version specified by version_id of the knit.
547
The components should be applied in the order of the returned
550
The basis knit will be used to the largest extent possible
551
since it is assumed that accesses to it is faster.
522
def _get_components_positions(self, version_ids):
523
"""Produce a map of position data for the components of versions.
525
This data is intended to be used for retrieving the knit records.
527
A dict of version_id to (method, data_pos, data_size, next) is
529
method is the way referenced data should be applied.
530
data_pos is the position of the data in the knit.
531
data_size is the size of the data in the knit.
532
next is the build-parent of the version, or None for fulltexts.
554
# 4168 calls in 14912, 2289 internal
555
# 4168 in 9711 to read_records
556
# 52554 in 1250 to get_parents
557
# 170166 in 865 to list.append
559
# needed_revisions holds a list of (method, version_id) of
560
# versions that is needed to be fetched to construct the final
561
# version of the file.
563
# basis_revisions is a list of versions that needs to be
564
# fetched but exists in the basis knit.
566
needed_versions, basis_versions = \
567
self._get_component_versions(version_id)
571
assert False, "I am broken"
572
basis = self.basis_knit
574
for comp_id in basis_versions:
575
data_pos, data_size = basis._index.get_data_position(comp_id)
576
records.append((piece_id, data_pos, data_size))
577
components.update(basis._data.read_records(records))
580
for comp_id in [vid for method, vid in needed_versions
581
if vid not in basis_versions]:
582
data_pos, data_size = self._index.get_position(comp_id)
583
records.append((comp_id, data_pos, data_size))
584
components.update(self._data.read_records(records))
586
# get_data_records returns a mapping with the version id as
587
# index and the value as data. The order the components need
588
# to be applied is held by needed_versions (reversed).
590
for method, comp_id in reversed(needed_versions):
591
out.append((comp_id, method, components[comp_id]))
535
for version_id in version_ids:
538
while cursor is not None and cursor not in component_data:
539
method = self._index.get_method(cursor)
540
if method == 'fulltext':
543
next = self.get_parents(cursor)[0]
544
data_pos, data_size = self._index.get_position(cursor)
545
component_data[cursor] = (method, data_pos, data_size, next)
547
return component_data
595
549
def _get_content(self, version_id, parent_texts={}):
596
550
"""Returns a content object that makes up the specified
602
556
if cached_version is not None:
603
557
return cached_version
605
if self.basis_knit and version_id in self.basis_knit:
606
return self.basis_knit._get_content(version_id)
609
components = self._get_components(version_id)
610
for component_id, method, (data, digest) in components:
611
version_idx = self._index.lookup(component_id)
612
if method == 'fulltext':
613
assert content is None
614
content = self.factory.parse_fulltext(data, version_idx)
615
elif method == 'line-delta':
616
delta = self.factory.parse_line_delta(data, version_idx)
617
content._lines = self._apply_delta(content._lines, delta)
619
if 'no-eol' in self._index.get_options(version_id):
620
line = content._lines[-1][1].rstrip('\n')
621
content._lines[-1] = (content._lines[-1][0], line)
623
# digest here is the digest from the last applied component.
624
if sha_strings(content.text()) != digest:
625
raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)
559
text_map, contents_map = self._get_content_maps([version_id])
560
return contents_map[version_id]
629
562
def _check_versions_present(self, version_ids):
630
563
"""Check that all specified versions are present."""
743
676
"""See VersionedFile.get_lines()."""
744
677
return self.get_line_list([version_id])[0]
746
def _get_version_components(self, position_map):
748
for version_id, positions in position_map.iteritems():
749
for method, comp_id, position, size in positions:
750
records.append((comp_id, position, size))
751
record_map = self._data.read_records(records)
754
for version_id, positions in position_map.iteritems():
756
for method, comp_id, position, size in positions:
757
data, digest = record_map[comp_id]
758
components.append((comp_id, method, data, digest))
759
component_map[version_id] = components
679
def _get_record_map(self, version_ids):
680
"""Produce a dictionary of knit records.
682
The keys are version_ids, the values are tuples of (method, content,
684
method is the way the content should be applied.
685
content is a KnitContent object.
686
digest is the SHA1 digest of this version id after all steps are done
687
next is the build-parent of the version, i.e. the leftmost ancestor.
688
If the method is fulltext, next will be None.
690
position_map = self._get_components_positions(version_ids)
691
# c = component_id, m = method, p = position, s = size, n = next
692
records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]
694
for component_id, content, digest in\
695
self._data.read_records_iter(records):
696
method, position, size, next = position_map[component_id]
697
record_map[component_id] = method, content, digest, next
762
701
def get_text(self, version_id):
763
702
"""See VersionedFile.get_text"""
769
708
def get_line_list(self, version_ids):
770
709
"""Return the texts of listed versions as a list of strings."""
710
text_map, content_map = self._get_content_maps(version_ids)
711
return [text_map[v] for v in version_ids]
713
def _get_content_maps(self, version_ids):
714
"""Produce maps of text and KnitContents
716
:return: (text_map, content_map) where text_map contains the texts for
717
the requested versions and content_map contains the KnitContents.
718
Both dicts take version_ids as their keys.
772
720
for version_id in version_ids:
773
721
if not self.has_version(version_id):
774
722
raise RevisionNotPresent(version_id, self.filename)
775
position_map[version_id] = \
776
self._get_component_positions(version_id)
778
version_components = self._get_version_components(position_map).items()
723
record_map = self._get_record_map(version_ids)
781
for version_id, components in version_components:
728
for version_id in version_ids:
731
while cursor is not None:
732
method, data, digest, next = record_map[cursor]
733
components.append((cursor, method, data, digest))
734
if cursor in content_map:
783
739
for component_id, method, data, digest in reversed(components):
784
version_idx = self._index.lookup(component_id)
785
if method == 'fulltext':
786
assert content is None
787
content = self.factory.parse_fulltext(data, version_idx)
788
elif method == 'line-delta':
789
delta = self.factory.parse_line_delta(data, version_idx)
790
content._lines = self._apply_delta(content._lines, delta)
740
if component_id in content_map:
741
content = content_map[component_id]
743
version_idx = self._index.lookup(component_id)
744
if method == 'fulltext':
745
assert content is None
746
content = self.factory.parse_fulltext(data, version_idx)
747
elif method == 'line-delta':
748
delta = self.factory.parse_line_delta(data[:],
750
content = content.copy()
751
content._lines = self._apply_delta(content._lines,
753
content_map[component_id] = content
792
755
if 'no-eol' in self._index.get_options(version_id):
756
content = content.copy()
793
757
line = content._lines[-1][1].rstrip('\n')
794
758
content._lines[-1] = (content._lines[-1][0], line)
759
final_content[version_id] = content
796
761
# digest here is the digest from the last applied component.
797
if sha_strings(content.text()) != digest:
762
text = content.text()
763
if sha_strings(text) != digest:
798
764
raise KnitCorrupt(self.filename,
799
765
'sha-1 does not match %s' % version_id)
801
text_map[version_id] = content.text()
802
return [text_map[v] for v in version_ids]
767
text_map[version_id] = text
768
return text_map, final_content
804
770
def iter_lines_added_or_present_in_versions(self, version_ids=None):
805
771
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1405
1368
It will actively recompress currently cached records on the
1406
1369
basis that that is cheaper than I/O activity.
1409
for version_id, pos, size in records:
1410
if version_id not in self._records:
1411
needed_records.append((version_id, pos, size))
1413
1371
# setup an iterator of the external records:
1414
1372
# uses readv so nice and fast we hope.
1415
if len(needed_records):
1416
1374
# grab the disk data needed.
1417
1375
raw_records = self._transport.readv(self._filename,
1418
[(pos, size) for version_id, pos, size in needed_records])
1376
[(pos, size) for version_id, pos, size in records])
1420
1378
for version_id, pos, size in records:
1421
if version_id in self._records:
1422
# compress a new version
1423
size, sio = self._record_to_data(version_id,
1424
self._records[version_id][0],
1425
self._records[version_id][1])
1426
yield version_id, sio.getvalue()
1428
pos, data = raw_records.next()
1429
# validate the header
1430
df, rec = self._parse_record_header(version_id, data)
1432
yield version_id, data
1379
pos, data = raw_records.next()
1380
# validate the header
1381
df, rec = self._parse_record_header(version_id, data)
1383
yield version_id, data
1435
1385
def read_records_iter(self, records):
1436
1386
"""Read text records from data file and yield result.
1439
1389
will be read in the given order. Yields (version_id,
1440
1390
contents, digest).
1392
if len(records) == 0:
1442
1394
# profiling notes:
1443
1395
# 60890 calls for 4168 extractions in 5045, 683 internal.
1444
1396
# 4168 calls to readv in 1411
1445
1397
# 4168 calls to parse_record in 2880
1448
for version_id, pos, size in records:
1449
if version_id not in self._records:
1450
needed_records.append((version_id, pos, size))
1452
if len(needed_records):
1453
needed_records.sort(key=operator.itemgetter(1))
1454
# We take it that the transport optimizes the fetching as good
1455
# as possible (ie, reads continuous ranges.)
1456
response = self._transport.readv(self._filename,
1457
[(pos, size) for version_id, pos, size in needed_records])
1459
for (record_id, pos, size), (pos, data) in \
1460
izip(iter(needed_records), response):
1461
content, digest = self._parse_record(record_id, data)
1462
self._records[record_id] = (digest, content)
1464
for version_id, pos, size in records:
1465
yield version_id, list(self._records[version_id][1]), self._records[version_id][0]
1399
# Get unique records, sorted by position
1400
needed_records = sorted(set(records), key=operator.itemgetter(1))
1402
# We take it that the transport optimizes the fetching as good
1403
# as possible (ie, reads continuous ranges.)
1404
response = self._transport.readv(self._filename,
1405
[(pos, size) for version_id, pos, size in needed_records])
1408
for (record_id, pos, size), (pos, data) in \
1409
izip(iter(needed_records), response):
1410
content, digest = self._parse_record(record_id, data)
1411
record_map[record_id] = (digest, content)
1413
for version_id, pos, size in records:
1414
digest, content = record_map[version_id]
1415
yield version_id, content, digest
1467
1417
def read_records(self, records):
1468
1418
"""Read records into a dictionary."""