101
101
RevisionNotPresent,
102
102
RevisionAlreadyPresent,
104
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
104
from bzrlib.graph import Graph
105
105
from bzrlib.osutils import (
106
106
contains_whitespace,
107
107
contains_linebreaks,
111
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
111
from bzrlib.symbol_versioning import (
112
DEPRECATED_PARAMETER,
112
117
from bzrlib.tsort import topo_sort
118
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
120
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
114
121
import bzrlib.weave
115
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
118
124
# TODO: Split out code specific to this format into an associated object.
138
144
def __init__(self):
139
145
self._should_strip_eol = False
142
"""Return a list of (origin, text) tuples."""
143
return list(self.annotate_iter())
145
147
def apply_delta(self, delta, new_version_id):
146
148
"""Apply delta to this object to become new_version_id."""
147
149
raise NotImplementedError(self.apply_delta)
200
202
KnitContent.__init__(self)
201
203
self._lines = lines
203
def annotate_iter(self):
204
"""Yield tuples of (origin, text) for each content line."""
205
return iter(self._lines)
206
"""Return a list of (origin, text) for each content line."""
207
return list(self._lines)
207
209
def apply_delta(self, delta, new_version_id):
208
210
"""Apply delta to this object to become new_version_id."""
250
252
self._lines = lines
251
253
self._version_id = version_id
253
def annotate_iter(self):
254
"""Yield tuples of (origin, text) for each content line."""
255
for line in self._lines:
256
yield self._version_id, line
256
"""Return a list of (origin, text) for each content line."""
257
return [(self._version_id, line) for line in self._lines]
258
259
def apply_delta(self, delta, new_version_id):
259
260
"""Apply delta to this object to become new_version_id."""
421
422
for origin, text in lines)
424
def annotate_iter(self, knit, version_id):
425
def annotate(self, knit, version_id):
425
426
content = knit._get_content(version_id)
426
return content.annotate_iter()
427
return content.annotate()
429
430
class KnitPlainFactory(_KnitFactory):
483
484
out.extend(lines)
486
def annotate_iter(self, knit, version_id):
487
def annotate(self, knit, version_id):
487
488
annotator = _KnitAnnotator(knit)
488
return iter(annotator.annotate(version_id))
489
return annotator.annotate(version_id)
491
492
def make_empty_knit(transport, relpath):
492
493
"""Construct a empty knit at the specified location."""
493
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
494
k = make_file_knit(transport, relpath, 'w', KnitPlainFactory)
497
def make_file_knit(name, transport, file_mode=None, access_mode='w',
498
factory=None, delta=True, create=False, create_parent_dir=False,
499
delay_create=False, dir_mode=None, get_scope=None):
500
"""Factory to create a KnitVersionedFile for a .knit/.kndx file pair."""
502
factory = KnitAnnotateFactory()
504
factory = KnitPlainFactory()
505
if get_scope is None:
506
get_scope = lambda:None
507
index = _KnitIndex(transport, name + INDEX_SUFFIX,
508
access_mode, create=create, file_mode=file_mode,
509
create_parent_dir=create_parent_dir, delay_create=delay_create,
510
dir_mode=dir_mode, get_scope=get_scope)
511
access = _KnitAccess(transport, name + DATA_SUFFIX, file_mode,
512
dir_mode, ((create and not len(index)) and delay_create),
514
return KnitVersionedFile(name, transport, factory=factory,
515
create=create, delay_create=delay_create, index=index,
516
access_method=access)
520
"""Return the suffixes used by file based knits."""
521
return [DATA_SUFFIX, INDEX_SUFFIX]
522
make_file_knit.get_suffixes = get_suffixes
496
525
class KnitVersionedFile(VersionedFile):
508
537
stored and retrieved.
511
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
540
def __init__(self, relpath, transport, file_mode=None,
512
541
factory=None, delta=True, create=False, create_parent_dir=False,
513
542
delay_create=False, dir_mode=None, index=None, access_method=None):
514
543
"""Construct a knit at location specified by relpath.
521
550
actually be created until the first data is stored.
522
551
:param index: An index to use for the knit.
524
if access_mode is None:
526
super(KnitVersionedFile, self).__init__(access_mode)
527
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode
553
super(KnitVersionedFile, self).__init__()
528
554
self.transport = transport
529
555
self.filename = relpath
530
556
self.factory = factory or KnitAnnotateFactory()
531
self.writable = (access_mode == 'w')
532
557
self.delta = delta
534
559
self._max_delta_chain = 200
537
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
538
access_mode, create=create, file_mode=file_mode,
539
create_parent_dir=create_parent_dir, delay_create=delay_create,
543
if access_method is None:
544
_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,
545
((create and not len(self)) and delay_create), create_parent_dir)
547
_access = access_method
561
if None in (access_method, index):
562
raise ValueError("No default access_method or index any more")
564
_access = access_method
548
565
if create and not len(self) and not delay_create:
550
567
self._data = _KnitData(_access)
583
600
return fulltext_size > delta_size
602
def _check_write_ok(self):
603
return self._index._check_write_ok()
585
605
def _add_raw_records(self, records, data):
586
606
"""Add all the records 'records' with data pre-joined in 'data'.
598
618
for (version_id, options, parents, size), access_memo in zip(
599
619
records, positions):
600
620
index_entries.append((version_id, options, access_memo, parents))
601
if self._data._do_cache:
602
self._data._cache[version_id] = data[offset:offset+size]
604
622
self._index.add_versions(index_entries)
606
def enable_cache(self):
607
"""Start caching data for this knit"""
608
self._data.enable_cache()
610
def clear_cache(self):
611
"""Clear the data cache only."""
612
self._data.clear_cache()
614
624
def copy_to(self, name, transport):
615
625
"""See VersionedFile.copy_to()."""
616
626
# copy the current index to a temp index to avoid racing with local
626
636
# move the copied index into place
627
637
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
629
def create_empty(self, name, transport, mode=None):
630
return KnitVersionedFile(name, transport, factory=self.factory,
631
delta=self.delta, create=True)
633
639
def get_data_stream(self, required_versions):
634
640
"""Get a data stream for the specified versions.
751
757
annotated_part = "plain"
752
758
return "knit-%s" % (annotated_part,)
760
@deprecated_method(one_four)
754
761
def get_graph_with_ghosts(self):
755
762
"""See VersionedFile.get_graph_with_ghosts()."""
756
graph_items = self._index.get_graph()
757
return dict(graph_items)
759
def get_sha1(self, version_id):
760
return self.get_sha1s([version_id])[0]
763
return self.get_parent_map(self.versions())
762
765
def get_sha1s(self, version_ids):
763
"""See VersionedFile.get_sha1()."""
766
"""See VersionedFile.get_sha1s()."""
764
767
record_map = self._get_record_map(version_ids)
765
768
# record entry 2 is the 'digest'.
766
769
return [record_map[v][2] for v in version_ids]
770
"""See VersionedFile.get_suffixes()."""
771
return [DATA_SUFFIX, INDEX_SUFFIX]
771
@deprecated_method(one_four)
773
772
def has_ghost(self, version_id):
774
773
"""True if there is a ghost reference in the file to version_id."""
775
774
# maybe we have it
776
775
if self.has_version(version_id):
778
777
# optimisable if needed by memoising the _ghosts set.
779
items = self._index.get_graph()
780
for node, parents in items:
778
items = self.get_parent_map(self.versions())
779
for parents in items.itervalues():
781
780
for parent in parents:
782
if parent not in self._index._cache:
783
if parent == version_id:
781
if parent == version_id and parent not in items:
787
785
def insert_data_stream(self, (format, data_list, reader_callable)):
818
816
# Also check the SHA-1 of the fulltext this content will
820
818
raw_data = reader_callable(length)
821
my_fulltext_sha1 = self.get_sha1(version_id)
819
my_fulltext_sha1 = self.get_sha1s([version_id])[0]
822
820
df, rec = self._data._parse_record_header(version_id, raw_data)
823
821
stream_fulltext_sha1 = rec[3]
824
822
if my_fulltext_sha1 != stream_fulltext_sha1:
1088
1086
def check(self, progress_bar=None):
1089
1087
"""See VersionedFile.check()."""
1091
def _clone_text(self, new_version_id, old_version_id, parents):
1092
"""See VersionedFile.clone_text()."""
1093
# FIXME RBC 20060228 make fast by only inserting an index with null
1095
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))
1097
1089
def get_lines(self, version_id):
1098
1090
"""See VersionedFile.get_lines()."""
1099
1091
return self.get_line_list([version_id])[0]
1238
1230
pb.update('Walking content.', total, total)
1240
def iter_parents(self, version_ids):
1241
"""Iterate through the parents for many version ids.
1243
:param version_ids: An iterable yielding version_ids.
1244
:return: An iterator that yields (version_id, parents). Requested
1245
version_ids not present in the versioned file are simply skipped.
1246
The order is undefined, allowing for different optimisations in
1247
the underlying implementation.
1249
return self._index.iter_parents(version_ids)
1251
1232
def num_versions(self):
1252
1233
"""See VersionedFile.num_versions()."""
1253
1234
return self._index.num_versions()
1255
1236
__len__ = num_versions
1257
def annotate_iter(self, version_id):
1258
"""See VersionedFile.annotate_iter."""
1259
return self.factory.annotate_iter(self, version_id)
1238
def annotate(self, version_id):
1239
"""See VersionedFile.annotate."""
1240
return self.factory.annotate(self, version_id)
1261
1242
def get_parent_map(self, version_ids):
1262
1243
"""See VersionedFile.get_parent_map."""
1382
def _check_write_ok(self):
1383
if self._get_scope() != self._scope:
1384
raise errors.OutSideTransaction()
1385
if self._mode != 'w':
1386
raise errors.ReadOnlyObjectDirtiedError(self)
1401
1388
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1402
create_parent_dir=False, delay_create=False, dir_mode=None):
1389
create_parent_dir=False, delay_create=False, dir_mode=None,
1403
1391
_KnitComponentFile.__init__(self, transport, filename, mode,
1404
1392
file_mode=file_mode,
1405
1393
create_parent_dir=create_parent_dir,
1427
1415
self._transport.put_bytes_non_atomic(
1428
1416
self._filename, self.HEADER, mode=self._file_mode)
1430
def get_graph(self):
1431
"""Return a list of the node:parents lists from this knit index."""
1432
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
1417
self._scope = get_scope()
1418
self._get_scope = get_scope
1434
1420
def get_ancestry(self, versions, topo_sorted=True):
1435
1421
"""See VersionedFile.get_ancestry."""
1507
1493
parents, (method, noeol))
1510
def iter_parents(self, version_ids):
1511
"""Iterate through the parents for many version ids.
1513
:param version_ids: An iterable yielding version_ids.
1514
:return: An iterator that yields (version_id, parents). Requested
1515
version_ids not present in the versioned file are simply skipped.
1516
The order is undefined, allowing for different optimisations in
1517
the underlying implementation.
1519
parent_map = self.get_parent_map(version_ids)
1520
parent_map_set = set(parent_map)
1521
unknown_existence = set()
1522
for parents in parent_map.itervalues():
1523
unknown_existence.update(parents)
1524
unknown_existence.difference_update(parent_map_set)
1525
present_parents = set(self.get_parent_map(unknown_existence))
1526
present_parents.update(parent_map_set)
1527
for version_id, parents in parent_map.iteritems():
1528
parents = tuple(parent for parent in parents
1529
if parent in present_parents)
1530
yield version_id, parents
1532
1496
def num_versions(self):
1533
1497
return len(self._history)
1849
1816
return 'fulltext'
1851
def get_graph(self):
1852
"""Return a list of the node:parents lists from this knit index."""
1853
if not self._parents:
1854
return [(key, ()) for key in self.get_versions()]
1856
for index, key, value, refs in self._graph_index.iter_all_entries():
1857
result.append((key[0], tuple([ref[0] for ref in refs[0]])))
1860
def iter_parents(self, version_ids):
1861
"""Iterate through the parents for many version ids.
1863
:param version_ids: An iterable yielding version_ids.
1864
:return: An iterator that yields (version_id, parents). Requested
1865
version_ids not present in the versioned file are simply skipped.
1866
The order is undefined, allowing for different optimisations in
1867
the underlying implementation.
1870
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1872
present_parents = set()
1873
for node in all_nodes:
1874
all_parents.update(node[3][0])
1875
# any node we are querying must be present
1876
present_parents.add(node[1])
1877
unknown_parents = all_parents.difference(present_parents)
1878
present_parents.update(self._present_keys(unknown_parents))
1879
for node in all_nodes:
1881
for parent in node[3][0]:
1882
if parent in present_parents:
1883
parents.append(parent[0])
1884
yield node[1][0], tuple(parents)
1886
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1887
yield node[1][0], ()
1889
1818
def num_versions(self):
1890
1819
return len(list(self._graph_index.iter_all_entries()))
2236
2165
def get_raw_records(self, memos_for_retrieval):
2237
2166
"""Get the raw bytes for a records.
2239
:param memos_for_retrieval: An iterable containing the (thunk_flag,
2240
index, start, end) memo for retrieving the bytes.
2241
:return: An iterator over the bytes of the records.
2168
:param memos_for_retrieval: An iterable of memos from the
2169
_StreamIndex object identifying bytes to read; for these classes
2170
they are (from_backing_knit, index, start, end) and can point to
2171
either the backing knit or streamed data.
2172
:return: An iterator yielding a byte string for each record in
2173
memos_for_retrieval.
2243
2175
# use a generator for memory friendliness
2244
for thunk_flag, version_id, start, end in memos_for_retrieval:
2245
if version_id is self.stream_index:
2176
for from_backing_knit, version_id, start, end in memos_for_retrieval:
2177
if not from_backing_knit:
2178
assert version_id is self.stream_index
2246
2179
yield self.data[start:end]
2248
2181
# we have been asked to thunk. This thunking only occurs when
2253
2186
# as desired. However, for now, this is sufficient.
2254
2187
if self.orig_factory.__class__ != KnitPlainFactory:
2255
2188
raise errors.KnitCorrupt(
2256
self, 'Bad thunk request %r' % version_id)
2189
self, 'Bad thunk request %r cannot be backed by %r' %
2190
(version_id, self.orig_factory))
2257
2191
lines = self.backing_knit.get_lines(version_id)
2258
2192
line_bytes = ''.join(lines)
2259
2193
digest = sha_string(line_bytes)
2194
# the packed form of the fulltext always has a trailing newline,
2195
# even if the actual text does not, unless the file is empty. the
2196
# record options including the noeol flag are passed through by
2197
# _StreamIndex, so this is safe.
2261
2199
if lines[-1][-1] != '\n':
2262
2200
lines[-1] = lines[-1] + '\n'
2263
2201
line_bytes += '\n'
2264
orig_options = list(self.backing_knit._index.get_options(version_id))
2265
if 'fulltext' not in orig_options:
2266
if 'line-delta' not in orig_options:
2267
raise errors.KnitCorrupt(self,
2268
'Unknown compression method %r' % orig_options)
2269
orig_options.remove('line-delta')
2270
orig_options.append('fulltext')
2271
2202
# We want plain data, because we expect to thunk only to allow text
2273
2204
size, bytes = self.backing_knit._data._record_to_data(version_id,
2325
2256
:return: A dict of version_id:(index_memo, compression_parent,
2326
2257
parents, record_details).
2328
opaque structure to pass to read_records to extract the raw
2259
opaque memo that can be passed to _StreamAccess.read_records
2260
to extract the raw data; for these classes it is
2261
(from_backing_knit, index, start, end)
2330
2262
compression_parent
2331
2263
Content that this record is built upon, may be None
2345
2277
parent_ids = self.get_parents_with_ghosts(version_id)
2346
2278
noeol = ('no-eol' in self.get_options(version_id))
2279
index_memo = self.get_position(version_id)
2280
from_backing_knit = index_memo[0]
2281
if from_backing_knit:
2282
# texts retrieved from the backing knit are always full texts
2347
2284
if method == 'fulltext':
2348
2285
compression_parent = None
2350
2287
compression_parent = parent_ids[0]
2351
index_memo = self.get_position(version_id)
2352
2288
result[version_id] = (index_memo, compression_parent,
2353
2289
parent_ids, (method, noeol))
2356
2292
def get_method(self, version_id):
2357
2293
"""Return compression method of specified version."""
2359
options = self._by_version[version_id][0]
2361
# Strictly speaking this should check in the backing knit, but
2362
# until we have a test to discriminate, this will do.
2363
return self.backing_index.get_method(version_id)
2294
options = self.get_options(version_id)
2364
2295
if 'fulltext' in options:
2365
2296
return 'fulltext'
2366
2297
elif 'line-delta' in options:
2377
2308
return self._by_version[version_id][0]
2378
2309
except KeyError:
2379
return self.backing_index.get_options(version_id)
2310
options = list(self.backing_index.get_options(version_id))
2311
if 'fulltext' in options:
2313
elif 'line-delta' in options:
2314
# Texts from the backing knit are always returned from the stream
2316
options.remove('line-delta')
2317
options.append('fulltext')
2319
raise errors.KnitIndexUnknownMethod(self, options)
2320
return tuple(options)
2381
2322
def get_parent_map(self, version_ids):
2382
2323
"""Passed through to by KnitVersionedFile.get_parent_map."""
2404
2345
coordinates into that (as index_memo's are opaque outside the
2405
2346
index and matching access class).
2407
:return: a tuple (thunk_flag, index, start, end). If thunk_flag is
2408
False, index will be self, otherwise it will be a version id.
2348
:return: a tuple (from_backing_knit, index, start, end) that can
2349
be passed e.g. to get_raw_records.
2350
If from_backing_knit is False, index will be self, otherwise it
2351
will be a version id.
2411
2354
start, end = self._by_version[version_id][1]
2418
2361
"""Get all the versions in the stream."""
2419
2362
return self._by_version.keys()
2421
def iter_parents(self, version_ids):
2422
"""Iterate through the parents for many version ids.
2424
:param version_ids: An iterable yielding version_ids.
2425
:return: An iterator that yields (version_id, parents). Requested
2426
version_ids not present in the versioned file are simply skipped.
2427
The order is undefined, allowing for different optimisations in
2428
the underlying implementation.
2431
for version in version_ids:
2433
result.append((version, self._by_version[version][2]))
2439
2365
class _KnitData(object):
2440
2366
"""Manage extraction of data from a KnitAccess, caching and decompressing.
2453
2379
self._access = access
2454
2380
self._checked = False
2455
# TODO: jam 20060713 conceptually, this could spill to disk
2456
# if the cached size gets larger than a certain amount
2457
# but it complicates the model a bit, so for now just use
2458
# a simple dictionary
2460
self._do_cache = False
2462
def enable_cache(self):
2463
"""Enable caching of reads."""
2464
self._do_cache = True
2466
def clear_cache(self):
2467
"""Clear the record cache."""
2468
self._do_cache = False
2471
2382
def _open_file(self):
2472
2383
return self._access.open_file()
2572
2483
# uses readv so nice and fast we hope.
2573
2484
if len(records):
2574
2485
# grab the disk data needed.
2576
# Don't check _cache if it is empty
2577
needed_offsets = [index_memo for version_id, index_memo
2579
if version_id not in self._cache]
2581
needed_offsets = [index_memo for version_id, index_memo
2486
needed_offsets = [index_memo for version_id, index_memo
2584
2488
raw_records = self._access.get_raw_records(needed_offsets)
2586
2490
for version_id, index_memo in records:
2587
if version_id in self._cache:
2588
# This data has already been validated
2589
data = self._cache[version_id]
2591
data = raw_records.next()
2593
self._cache[version_id] = data
2595
# validate the header
2596
df, rec = self._parse_record_header(version_id, data)
2491
data = raw_records.next()
2492
# validate the header
2493
df, rec = self._parse_record_header(version_id, data)
2598
2495
yield version_id, data
2600
2497
def read_records_iter(self, records):
2610
2507
if not records:
2614
# Skip records we have alread seen
2615
yielded_records = set()
2616
needed_records = set()
2617
for record in records:
2618
if record[0] in self._cache:
2619
if record[0] in yielded_records:
2621
yielded_records.add(record[0])
2622
data = self._cache[record[0]]
2623
content, digest = self._parse_record(record[0], data)
2624
yield (record[0], content, digest)
2626
needed_records.add(record)
2627
needed_records = sorted(needed_records, key=operator.itemgetter(1))
2629
needed_records = sorted(set(records), key=operator.itemgetter(1))
2510
needed_records = sorted(set(records), key=operator.itemgetter(1))
2631
2511
if not needed_records:
2639
2519
for (version_id, index_memo), data in \
2640
2520
izip(iter(needed_records), raw_data):
2641
2521
content, digest = self._parse_record(version_id, data)
2643
self._cache[version_id] = data
2644
2522
yield version_id, content, digest
2646
2524
def read_records(self, records):
2655
2533
class InterKnit(InterVersionedFile):
2656
2534
"""Optimised code paths for knit to knit operations."""
2658
_matching_file_from_factory = KnitVersionedFile
2659
_matching_file_to_factory = KnitVersionedFile
2536
_matching_file_from_factory = staticmethod(make_file_knit)
2537
_matching_file_to_factory = staticmethod(make_file_knit)
2662
2540
def is_compatible(source, target):
2673
2551
see join() for the parameter definitions.
2675
2553
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2676
graph = self.source.get_graph(version_ids)
2677
order = topo_sort(graph.items())
2554
# --- the below is factorable out with VersionedFile.join, but wait for
2555
# VersionedFiles, it may all be simpler then.
2556
graph = Graph(self.source)
2557
search = graph._make_breadth_first_searcher(version_ids)
2558
transitive_ids = set()
2559
map(transitive_ids.update, list(search))
2560
parent_map = self.source.get_parent_map(transitive_ids)
2561
order = topo_sort(parent_map.items())
2679
2563
def size_of_content(content):
2680
2564
return sum(len(line) for line in content.text())