295
308
# double handling for now. Make it work until then.
296
309
bytes = ''.join(lines)
297
310
record = FulltextContentFactory(key, parents, None, bytes)
298
sha1 = self._insert_record_stream([record]).next()
311
sha1 = list(self._insert_record_stream([record], random_id=random_id))[0]
299
312
return sha1, len(bytes), None
301
314
def annotate(self, key):
302
315
"""See VersionedFiles.annotate."""
303
316
graph = Graph(self)
317
parent_map = self.get_parent_map([key])
319
raise errors.RevisionNotPresent(key, self)
320
if parent_map[key] is not None:
321
search = graph._make_breadth_first_searcher([key])
325
present, ghosts = search.next_with_ghosts()
326
except StopIteration:
329
parent_map = self.get_parent_map(keys)
332
parent_map = {key:()}
304
333
head_cache = _mod_graph.FrozenHeadsCache(graph)
305
search = graph._make_breadth_first_searcher([key])
309
present, ghosts = search.next_with_ghosts()
310
except StopIteration:
313
parent_map = self.get_parent_map(keys)
314
334
parent_cache = {}
315
335
reannotate = annotate.reannotate
316
336
for record in self.get_record_stream(keys, 'topological', True):
335
361
self._check_lines_not_unicode(lines)
336
362
self._check_lines_are_lines(lines)
364
def get_parent_map(self, keys):
365
"""Get a map of the parents of keys.
367
:param keys: The keys to look up parents for.
368
:return: A mapping from keys to parents. Absent keys are absent from
372
sources = [self._index]
375
for source in sources:
378
new_result = source.get_parent_map(missing)
379
source_results.append(new_result)
380
result.update(new_result)
381
missing.difference_update(set(new_result))
384
def get_record_stream(self, keys, ordering, include_delta_closure):
385
"""Get a stream of records for keys.
387
:param keys: The keys to include.
388
:param ordering: Either 'unordered' or 'topological'. A topologically
389
sorted stream has compression parents strictly before their
391
:param include_delta_closure: If True then the closure across any
392
compression parents will be included (in the opaque data).
393
:return: An iterator of ContentFactory objects, each of which is only
394
valid until the iterator is advanced.
396
# keys might be a generator
400
if not self._index.has_graph:
401
# Cannot topological order when no graph has been stored.
402
ordering = 'unordered'
404
locations = self._index.get_build_details(keys)
405
if ordering == 'topological':
406
# would be better to not globally sort initially but instead
407
# start with one key, recurse to its oldest parent, then grab
408
# everything in the same group, etc.
409
parent_map = dict((key, details[2]) for key, details in
410
locations.iteritems())
411
present_keys = topo_sort(parent_map)
412
# Now group by source:
414
present_keys = locations.keys()
415
absent_keys = keys.difference(set(locations))
416
for key in absent_keys:
417
yield AbsentContentFactory(key)
418
for key in present_keys:
419
index_memo, _, parents, (method, _) = locations[key]
421
read_memo = index_memo[0:3]
422
zdata = self._access.get_raw_records([read_memo]).next()
424
plain = zlib.decompress(zdata)
426
delta_lines = split_lines(plain[index_memo[3]:index_memo[4]])
427
label, sha1, delta = parse(delta_lines)
429
raise AssertionError("wrong key: %r, wanted %r" % (label, key))
430
basis = plain[:index_memo[3]]
431
basis = StringIO(basis).readlines()
432
#basis = split_lines(plain[:last_end])
433
lines = apply_delta(basis, delta)
434
bytes = ''.join(lines)
435
yield FulltextContentFactory(key, parents, sha1, bytes)
437
def get_sha1s(self, keys):
438
"""See VersionedFiles.get_sha1s()."""
440
for record in self.get_record_stream(keys, 'unordered', True):
441
if record.sha1 != None:
442
result[record.key] = record.sha1
444
if record.storage_kind != 'absent':
445
result[record.key] == sha_string(record.get_bytes_as(
338
449
def insert_record_stream(self, stream):
339
450
"""Insert a record stream into this container.
355
467
:seealso insert_record_stream:
356
468
:seealso add_lines:
470
def get_adapter(adapter_key):
472
return adapters[adapter_key]
474
adapter_factory = adapter_registry.get(adapter_key)
475
adapter = adapter_factory(self)
476
adapters[adapter_key] = adapter
358
479
compressor = GroupCompressor(self._delta)
359
480
# This will go up to fulltexts for gc to gc fetching, which isn't
361
484
for record in stream:
485
# Raise an error when a record is missing.
486
if record.storage_kind == 'absent':
487
raise errors.RevisionNotPresent([record.key], self)
488
elif record.storage_kind == 'fulltext':
489
bytes = record.get_bytes_as('fulltext')
491
adapter_key = record.storage_kind, 'fulltext'
492
adapter = get_adapter(adapter_key)
493
bytes = adapter.get_bytes(record,
494
record.get_bytes_as(record.storage_kind))
362
495
found_sha1, end_point = compressor.compress(record.key,
363
split_lines(record.get_bytes_as('fulltext')), record.sha1)
496
split_lines(bytes), record.sha1)
498
keys_to_add.append((record.key, '%d %d' % (basis_end, end_point),
500
basis_end = end_point
501
compressed = zlib.compress(''.join(compressor.lines))
502
index, start, length = self._access.add_raw_records(
503
[(None, len(compressed))], compressed)[0]
505
for key, reads, refs in keys_to_add:
506
nodes.append((key, "%d %d %s" % (start, length, reads), refs))
507
self._index.add_records(nodes, random_id=random_id)
509
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
510
"""Iterate over the lines in the versioned files from keys.
512
This may return lines from other keys. Each item the returned
513
iterator yields is a tuple of a line and a text version that that line
514
is present in (not introduced in).
516
Ordering of results is in whatever order is most suitable for the
517
underlying storage format.
519
If a progress bar is supplied, it may be used to indicate progress.
520
The caller is responsible for cleaning up progress bars (because this
524
* Lines are normalised by the underlying store: they will all have \n
526
* Lines are returned in arbitrary order.
528
:return: An iterator over (line, key).
531
pb = progress.DummyProgress()
534
# we don't care about inclusions, the caller cares.
535
# but we need to setup a list of records to visit.
536
# we need key, position, length
537
for key_idx, record in enumerate(self.get_record_stream(keys,
539
# XXX: todo - optimise to use less than full texts.
541
pb.update('Walking content.', key_idx, total)
542
if record.storage_kind == 'absent':
543
raise errors.RevisionNotPresent(record.key, self)
544
lines = split_lines(record.get_bytes_as('fulltext'))
547
pb.update('Walking content.', total, total)
550
"""See VersionedFiles.keys."""
551
if 'evil' in debug.debug_flags:
552
trace.mutter_callsite(2, "keys scales with size of history")
553
sources = [self._index]
555
for source in sources:
556
result.update(source.keys())
366
560
class _GCGraphIndex(object):
367
561
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
389
583
if deltas and not parents:
390
584
# XXX: TODO: Delta tree and parent graph should be conceptually
392
raise KnitCorrupt(self, "Cannot do delta compression without "
586
raise errors.KnitCorrupt(self, "Cannot do delta compression without "
393
587
"parent tracking.")
394
588
self.has_graph = parents
395
589
self._is_locked = is_locked
591
def add_records(self, records, random_id=False):
592
"""Add multiple records to the index.
594
This function does not insert data into the Immutable GraphIndex
595
backing the KnitGraphIndex, instead it prepares data for insertion by
596
the caller and checks that it is safe to insert then calls
597
self._add_callback with the prepared GraphIndex nodes.
599
:param records: a list of tuples:
600
(key, options, access_memo, parents).
601
:param random_id: If True the ids being added were randomly generated
602
and no check for existence will be performed.
604
if not self._add_callback:
605
raise errors.ReadOnlyError(self)
606
# we hope there are no repositories with inconsistent parentage
611
for (key, value, refs) in records:
612
if not self._parents:
616
raise KnitCorrupt(self,
617
"attempt to add node with parents "
618
"in parentless index.")
621
keys[key] = (value, refs)
624
present_nodes = self._get_entries(keys)
625
for (index, key, value, node_refs) in present_nodes:
626
if node_refs != keys[key][1]:
627
raise errors.KnitCorrupt(self, "inconsistent details in add_records"
628
": %s %s" % ((value, node_refs), keys[key]))
634
for key, (value, node_refs) in keys.iteritems():
635
result.append((key, value, node_refs))
637
for key, (value, node_refs) in keys.iteritems():
638
result.append((key, value))
640
self._add_callback(records)
642
def _check_read(self):
643
"""raise if reads are not permitted."""
644
if not self._is_locked():
645
raise errors.ObjectNotLocked(self)
397
647
def _check_write_ok(self):
398
648
"""Assert if writes are not permitted."""
399
649
if not self._is_locked():
400
650
raise errors.ObjectNotLocked(self)
652
def _get_entries(self, keys, check_present=False):
653
"""Get the entries for keys.
655
:param keys: An iterable of index key tuples.
660
for node in self._graph_index.iter_entries(keys):
662
found_keys.add(node[1])
664
# adapt parentless index to the rest of the code.
665
for node in self._graph_index.iter_entries(keys):
666
yield node[0], node[1], node[2], ()
667
found_keys.add(node[1])
669
missing_keys = keys.difference(found_keys)
671
raise RevisionNotPresent(missing_keys.pop(), self)
673
def get_parent_map(self, keys):
674
"""Get a map of the parents of keys.
676
:param keys: The keys to look up parents for.
677
:return: A mapping from keys to parents. Absent keys are absent from
681
nodes = self._get_entries(keys)
685
result[node[1]] = node[3][0]
688
result[node[1]] = None
691
def get_build_details(self, keys):
692
"""Get the various build details for keys.
694
Ghosts are omitted from the result.
696
:param keys: An iterable of keys.
697
:return: A dict of key:
698
(index_memo, compression_parent, parents, record_details).
700
opaque structure to pass to read_records to extract the raw
703
Content that this record is built upon, may be None
705
Logical parents of this node
707
extra information about the content which needs to be passed to
712
entries = self._get_entries(keys, False)
713
for entry in entries:
715
if not self._parents:
718
parents = entry[3][0]
721
result[key] = (self._node_to_position(entry),
722
None, parents, (method, None))
726
"""Get all the keys in the collection.
728
The keys are not ordered.
731
return [node[1] for node in self._graph_index.iter_all_entries()]
733
def _node_to_position(self, node):
734
"""Convert an index value to position details."""
735
bits = node[2].split(' ')
736
# It would be nice not to read the entire gzip.
739
basis_end = int(bits[2])
740
delta_end = int(bits[3])
741
return node[0], start, stop, basis_end, delta_end