144
144
def __init__(self):
145
145
self._should_strip_eol = False
148
"""Return a list of (origin, text) tuples."""
149
return list(self.annotate_iter())
151
147
def apply_delta(self, delta, new_version_id):
152
148
"""Apply delta to this object to become new_version_id."""
153
149
raise NotImplementedError(self.apply_delta)
206
202
KnitContent.__init__(self)
207
203
self._lines = lines
209
def annotate_iter(self):
210
"""Yield tuples of (origin, text) for each content line."""
211
return iter(self._lines)
206
"""Return a list of (origin, text) for each content line."""
207
return list(self._lines)
213
209
def apply_delta(self, delta, new_version_id):
214
210
"""Apply delta to this object to become new_version_id."""
256
252
self._lines = lines
257
253
self._version_id = version_id
259
def annotate_iter(self):
260
"""Yield tuples of (origin, text) for each content line."""
261
for line in self._lines:
262
yield self._version_id, line
256
"""Return a list of (origin, text) for each content line."""
257
return [(self._version_id, line) for line in self._lines]
264
259
def apply_delta(self, delta, new_version_id):
265
260
"""Apply delta to this object to become new_version_id."""
427
422
for origin, text in lines)
430
def annotate_iter(self, knit, version_id):
425
def annotate(self, knit, version_id):
431
426
content = knit._get_content(version_id)
432
return content.annotate_iter()
427
return content.annotate()
435
430
class KnitPlainFactory(_KnitFactory):
489
484
out.extend(lines)
492
def annotate_iter(self, knit, version_id):
487
def annotate(self, knit, version_id):
493
488
annotator = _KnitAnnotator(knit)
494
return iter(annotator.annotate(version_id))
489
return annotator.annotate(version_id)
497
492
def make_empty_knit(transport, relpath):
498
493
"""Construct a empty knit at the specified location."""
499
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
494
k = make_file_knit(transport, relpath, 'w', KnitPlainFactory)
497
def make_file_knit(name, transport, file_mode=None, access_mode='w',
498
factory=None, delta=True, create=False, create_parent_dir=False,
499
delay_create=False, dir_mode=None, get_scope=None):
500
"""Factory to create a KnitVersionedFile for a .knit/.kndx file pair."""
502
factory = KnitAnnotateFactory()
504
factory = KnitPlainFactory()
505
if get_scope is None:
506
get_scope = lambda:None
507
index = _KnitIndex(transport, name + INDEX_SUFFIX,
508
access_mode, create=create, file_mode=file_mode,
509
create_parent_dir=create_parent_dir, delay_create=delay_create,
510
dir_mode=dir_mode, get_scope=get_scope)
511
access = _KnitAccess(transport, name + DATA_SUFFIX, file_mode,
512
dir_mode, ((create and not len(index)) and delay_create),
514
return KnitVersionedFile(name, transport, factory=factory,
515
create=create, delay_create=delay_create, index=index,
516
access_method=access)
520
"""Return the suffixes used by file based knits."""
521
return [DATA_SUFFIX, INDEX_SUFFIX]
522
make_file_knit.get_suffixes = get_suffixes
502
525
class KnitVersionedFile(VersionedFile):
514
537
stored and retrieved.
517
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
540
def __init__(self, relpath, transport, file_mode=None,
518
541
factory=None, delta=True, create=False, create_parent_dir=False,
519
542
delay_create=False, dir_mode=None, index=None, access_method=None):
520
543
"""Construct a knit at location specified by relpath.
527
550
actually be created until the first data is stored.
528
551
:param index: An index to use for the knit.
530
if access_mode is None:
532
super(KnitVersionedFile, self).__init__(access_mode)
533
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode
553
super(KnitVersionedFile, self).__init__()
534
554
self.transport = transport
535
555
self.filename = relpath
536
556
self.factory = factory or KnitAnnotateFactory()
537
self.writable = (access_mode == 'w')
538
557
self.delta = delta
540
559
self._max_delta_chain = 200
543
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
544
access_mode, create=create, file_mode=file_mode,
545
create_parent_dir=create_parent_dir, delay_create=delay_create,
549
if access_method is None:
550
_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,
551
((create and not len(self)) and delay_create), create_parent_dir)
553
_access = access_method
561
if None in (access_method, index):
562
raise ValueError("No default access_method or index any more")
564
_access = access_method
554
565
if create and not len(self) and not delay_create:
556
567
self._data = _KnitData(_access)
589
600
return fulltext_size > delta_size
602
def _check_write_ok(self):
603
return self._index._check_write_ok()
591
605
def _add_raw_records(self, records, data):
592
606
"""Add all the records 'records' with data pre-joined in 'data'.
604
618
for (version_id, options, parents, size), access_memo in zip(
605
619
records, positions):
606
620
index_entries.append((version_id, options, access_memo, parents))
607
if self._data._do_cache:
608
self._data._cache[version_id] = data[offset:offset+size]
610
622
self._index.add_versions(index_entries)
612
def enable_cache(self):
613
"""Start caching data for this knit"""
614
self._data.enable_cache()
616
def clear_cache(self):
617
"""Clear the data cache only."""
618
self._data.clear_cache()
620
624
def copy_to(self, name, transport):
621
625
"""See VersionedFile.copy_to()."""
622
626
# copy the current index to a temp index to avoid racing with local
758
762
"""See VersionedFile.get_graph_with_ghosts()."""
759
763
return self.get_parent_map(self.versions())
761
def get_sha1(self, version_id):
762
return self.get_sha1s([version_id])[0]
764
765
def get_sha1s(self, version_ids):
765
"""See VersionedFile.get_sha1()."""
766
"""See VersionedFile.get_sha1s()."""
766
767
record_map = self._get_record_map(version_ids)
767
768
# record entry 2 is the 'digest'.
768
769
return [record_map[v][2] for v in version_ids]
772
"""See VersionedFile.get_suffixes()."""
773
return [DATA_SUFFIX, INDEX_SUFFIX]
775
771
@deprecated_method(one_four)
776
772
def has_ghost(self, version_id):
777
773
"""True if there is a ghost reference in the file to version_id."""
820
816
# Also check the SHA-1 of the fulltext this content will
822
818
raw_data = reader_callable(length)
823
my_fulltext_sha1 = self.get_sha1(version_id)
819
my_fulltext_sha1 = self.get_sha1s([version_id])[0]
824
820
df, rec = self._data._parse_record_header(version_id, raw_data)
825
821
stream_fulltext_sha1 = rec[3]
826
822
if my_fulltext_sha1 != stream_fulltext_sha1:
1090
1086
def check(self, progress_bar=None):
1091
1087
"""See VersionedFile.check()."""
1093
def _clone_text(self, new_version_id, old_version_id, parents):
1094
"""See VersionedFile.clone_text()."""
1095
# FIXME RBC 20060228 make fast by only inserting an index with null
1097
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))
1099
1089
def get_lines(self, version_id):
1100
1090
"""See VersionedFile.get_lines()."""
1101
1091
return self.get_line_list([version_id])[0]
1240
1230
pb.update('Walking content.', total, total)
1242
def iter_parents(self, version_ids):
1243
"""Iterate through the parents for many version ids.
1245
:param version_ids: An iterable yielding version_ids.
1246
:return: An iterator that yields (version_id, parents). Requested
1247
version_ids not present in the versioned file are simply skipped.
1248
The order is undefined, allowing for different optimisations in
1249
the underlying implementation.
1251
return self._index.iter_parents(version_ids)
1253
1232
def num_versions(self):
1254
1233
"""See VersionedFile.num_versions()."""
1255
1234
return self._index.num_versions()
1257
1236
__len__ = num_versions
1259
def annotate_iter(self, version_id):
1260
"""See VersionedFile.annotate_iter."""
1261
return self.factory.annotate_iter(self, version_id)
1238
def annotate(self, version_id):
1239
"""See VersionedFile.annotate."""
1240
return self.factory.annotate(self, version_id)
1263
1242
def get_parent_map(self, version_ids):
1264
1243
"""See VersionedFile.get_parent_map."""
1382
def _check_write_ok(self):
1383
if self._get_scope() != self._scope:
1384
raise errors.OutSideTransaction()
1385
if self._mode != 'w':
1386
raise errors.ReadOnlyObjectDirtiedError(self)
1403
1388
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1404
create_parent_dir=False, delay_create=False, dir_mode=None):
1389
create_parent_dir=False, delay_create=False, dir_mode=None,
1405
1391
_KnitComponentFile.__init__(self, transport, filename, mode,
1406
1392
file_mode=file_mode,
1407
1393
create_parent_dir=create_parent_dir,
1429
1415
self._transport.put_bytes_non_atomic(
1430
1416
self._filename, self.HEADER, mode=self._file_mode)
1417
self._scope = get_scope()
1418
self._get_scope = get_scope
1432
1420
def get_ancestry(self, versions, topo_sorted=True):
1433
1421
"""See VersionedFile.get_ancestry."""
1505
1493
parents, (method, noeol))
1508
def iter_parents(self, version_ids):
1509
"""Iterate through the parents for many version ids.
1511
:param version_ids: An iterable yielding version_ids.
1512
:return: An iterator that yields (version_id, parents). Requested
1513
version_ids not present in the versioned file are simply skipped.
1514
The order is undefined, allowing for different optimisations in
1515
the underlying implementation.
1517
parent_map = self.get_parent_map(version_ids)
1518
parent_map_set = set(parent_map)
1519
unknown_existence = set()
1520
for parents in parent_map.itervalues():
1521
unknown_existence.update(parents)
1522
unknown_existence.difference_update(parent_map_set)
1523
present_parents = set(self.get_parent_map(unknown_existence))
1524
present_parents.update(parent_map_set)
1525
for version_id, parents in parent_map.iteritems():
1526
parents = tuple(parent for parent in parents
1527
if parent in present_parents)
1528
yield version_id, parents
1530
1496
def num_versions(self):
1531
1497
return len(self._history)
1847
1816
return 'fulltext'
1849
def iter_parents(self, version_ids):
1850
"""Iterate through the parents for many version ids.
1852
:param version_ids: An iterable yielding version_ids.
1853
:return: An iterator that yields (version_id, parents). Requested
1854
version_ids not present in the versioned file are simply skipped.
1855
The order is undefined, allowing for different optimisations in
1856
the underlying implementation.
1859
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1861
present_parents = set()
1862
for node in all_nodes:
1863
all_parents.update(node[3][0])
1864
# any node we are querying must be present
1865
present_parents.add(node[1])
1866
unknown_parents = all_parents.difference(present_parents)
1867
present_parents.update(self._present_keys(unknown_parents))
1868
for node in all_nodes:
1870
for parent in node[3][0]:
1871
if parent in present_parents:
1872
parents.append(parent[0])
1873
yield node[1][0], tuple(parents)
1875
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1876
yield node[1][0], ()
1878
1818
def num_versions(self):
1879
1819
return len(list(self._graph_index.iter_all_entries()))
2421
2361
"""Get all the versions in the stream."""
2422
2362
return self._by_version.keys()
2424
def iter_parents(self, version_ids):
2425
"""Iterate through the parents for many version ids.
2427
:param version_ids: An iterable yielding version_ids.
2428
:return: An iterator that yields (version_id, parents). Requested
2429
version_ids not present in the versioned file are simply skipped.
2430
The order is undefined, allowing for different optimisations in
2431
the underlying implementation.
2434
for version in version_ids:
2436
result.append((version, self._by_version[version][2]))
2442
2365
class _KnitData(object):
2443
2366
"""Manage extraction of data from a KnitAccess, caching and decompressing.
2456
2379
self._access = access
2457
2380
self._checked = False
2458
# TODO: jam 20060713 conceptually, this could spill to disk
2459
# if the cached size gets larger than a certain amount
2460
# but it complicates the model a bit, so for now just use
2461
# a simple dictionary
2463
self._do_cache = False
2465
def enable_cache(self):
2466
"""Enable caching of reads."""
2467
self._do_cache = True
2469
def clear_cache(self):
2470
"""Clear the record cache."""
2471
self._do_cache = False
2474
2382
def _open_file(self):
2475
2383
return self._access.open_file()
2575
2483
# uses readv so nice and fast we hope.
2576
2484
if len(records):
2577
2485
# grab the disk data needed.
2579
# Don't check _cache if it is empty
2580
needed_offsets = [index_memo for version_id, index_memo
2582
if version_id not in self._cache]
2584
needed_offsets = [index_memo for version_id, index_memo
2486
needed_offsets = [index_memo for version_id, index_memo
2587
2488
raw_records = self._access.get_raw_records(needed_offsets)
2589
2490
for version_id, index_memo in records:
2590
if version_id in self._cache:
2591
# This data has already been validated
2592
data = self._cache[version_id]
2594
data = raw_records.next()
2596
self._cache[version_id] = data
2598
# validate the header
2599
df, rec = self._parse_record_header(version_id, data)
2491
data = raw_records.next()
2492
# validate the header
2493
df, rec = self._parse_record_header(version_id, data)
2601
2495
yield version_id, data
2603
2497
def read_records_iter(self, records):
2613
2507
if not records:
2617
# Skip records we have alread seen
2618
yielded_records = set()
2619
needed_records = set()
2620
for record in records:
2621
if record[0] in self._cache:
2622
if record[0] in yielded_records:
2624
yielded_records.add(record[0])
2625
data = self._cache[record[0]]
2626
content, digest = self._parse_record(record[0], data)
2627
yield (record[0], content, digest)
2629
needed_records.add(record)
2630
needed_records = sorted(needed_records, key=operator.itemgetter(1))
2632
needed_records = sorted(set(records), key=operator.itemgetter(1))
2510
needed_records = sorted(set(records), key=operator.itemgetter(1))
2634
2511
if not needed_records:
2642
2519
for (version_id, index_memo), data in \
2643
2520
izip(iter(needed_records), raw_data):
2644
2521
content, digest = self._parse_record(version_id, data)
2646
self._cache[version_id] = data
2647
2522
yield version_id, content, digest
2649
2524
def read_records(self, records):
2658
2533
class InterKnit(InterVersionedFile):
2659
2534
"""Optimised code paths for knit to knit operations."""
2661
_matching_file_from_factory = KnitVersionedFile
2662
_matching_file_to_factory = KnitVersionedFile
2536
_matching_file_from_factory = staticmethod(make_file_knit)
2537
_matching_file_to_factory = staticmethod(make_file_knit)
2665
2540
def is_compatible(source, target):