76
118
search.stop_searching_any(exclude_keys.intersection(next_revs))
77
119
search_result = search.get_result()
78
if search_result.get_recipe()[2] != revision_count:
120
if (not discard_excess and
121
search_result.get_recipe()[3] != revision_count):
79
122
# we got back a different amount of data than expected, this
80
123
# gets reported as NoSuchRevision, because less revisions
81
124
# indicates missing revisions, and more should never happen as
82
125
# the excludes list considers ghosts and ensures that ghost
83
126
# filling races are not a problem.
84
127
return (None, FailedSmartServerResponse(('NoSuchRevision',)))
128
return (search_result, None)
133
class SmartServerRepositoryReadLocked(SmartServerRepositoryRequest):
134
"""Calls self.do_readlocked_repository_request."""
136
def do_repository_request(self, repository, *args):
137
"""Read lock a repository for do_readlocked_repository_request."""
138
repository.lock_read()
140
return self.do_readlocked_repository_request(repository, *args)
87
142
repository.unlock()
90
145
class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
91
146
"""Bzr 1.2+ - get parent data for revisions during a graph search."""
148
no_extra_results = False
93
150
def do_repository_request(self, repository, *revision_ids):
94
151
"""Get parent details for some revisions.
96
153
All the parents for revision_ids are returned. Additionally up to 64KB
97
154
of additional parent data found by performing a breadth first search
98
155
from revision_ids is returned. The verb takes a body containing the
99
156
current search state, see do_body for details.
158
If 'include-missing:' is in revision_ids, ghosts encountered in the
159
graph traversal for getting parent data are included in the result with
160
a prefix of 'missing:'.
101
162
:param repository: The repository to query in.
102
163
:param revision_ids: The utf8 encoded revision_id to answer for.
298
409
return SuccessfulSmartServerResponse(('ok', token))
412
class SmartServerRepositoryGetStream(SmartServerRepositoryRequest):
414
def do_repository_request(self, repository, to_network_name):
415
"""Get a stream for inserting into a to_format repository.
417
:param repository: The repository to stream from.
418
:param to_network_name: The network name of the format of the target
421
self._to_format = network_format_registry.get(to_network_name)
422
if self._should_fake_unknown():
423
return FailedSmartServerResponse(
424
('UnknownMethod', 'Repository.get_stream'))
425
return None # Signal that we want a body.
427
def _should_fake_unknown(self):
428
"""Return True if we should return UnknownMethod to the client.
430
This is a workaround for bugs in pre-1.19 clients that claim to
431
support receiving streams of CHK repositories. The pre-1.19 client
432
expects inventory records to be serialized in the format defined by
433
to_network_name, but in pre-1.19 (at least) that format definition
434
tries to use the xml5 serializer, which does not correctly handle
435
rich-roots. After 1.19 the client can also accept inventory-deltas
436
(which avoids this issue), and those clients will use the
437
Repository.get_stream_1.19 verb instead of this one.
438
So: if this repository is CHK, and the to_format doesn't match,
439
we should just fake an UnknownSmartMethod error so that the client
440
will fallback to VFS, rather than sending it a stream we know it
443
from_format = self._repository._format
444
to_format = self._to_format
445
if not from_format.supports_chks:
446
# Source not CHK: that's ok
448
if (to_format.supports_chks and
449
from_format.repository_class is to_format.repository_class and
450
from_format._serializer == to_format._serializer):
451
# Source is CHK, but target matches: that's ok
452
# (e.g. 2a->2a, or CHK2->2a)
454
# Source is CHK, and target is not CHK or incompatible CHK. We can't
455
# generate a compatible stream.
458
def do_body(self, body_bytes):
459
repository = self._repository
460
repository.lock_read()
462
search_result, error = self.recreate_search(repository, body_bytes,
464
if error is not None:
467
source = repository._get_source(self._to_format)
468
stream = source.get_stream(search_result)
470
exc_info = sys.exc_info()
472
# On non-error, unlocking is done by the body stream handler.
475
raise exc_info[0], exc_info[1], exc_info[2]
476
return SuccessfulSmartServerResponse(('ok',),
477
body_stream=self.body_stream(stream, repository))
479
def body_stream(self, stream, repository):
480
byte_stream = _stream_to_byte_stream(stream, repository._format)
482
for bytes in byte_stream:
484
except errors.RevisionNotPresent, e:
485
# This shouldn't be able to happen, but as we don't buffer
486
# everything it can in theory happen.
488
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
493
class SmartServerRepositoryGetStream_1_19(SmartServerRepositoryGetStream):
495
def _should_fake_unknown(self):
496
"""Returns False; we don't need to workaround bugs in 1.19+ clients."""
500
def _stream_to_byte_stream(stream, src_format):
501
"""Convert a record stream to a self delimited byte stream."""
502
pack_writer = pack.ContainerSerialiser()
503
yield pack_writer.begin()
504
yield pack_writer.bytes_record(src_format.network_name(), '')
505
for substream_type, substream in stream:
506
for record in substream:
507
if record.storage_kind in ('chunked', 'fulltext'):
508
serialised = record_to_fulltext_bytes(record)
509
elif record.storage_kind == 'inventory-delta':
510
serialised = record_to_inventory_delta_bytes(record)
511
elif record.storage_kind == 'absent':
512
raise ValueError("Absent factory for %s" % (record.key,))
514
serialised = record.get_bytes_as(record.storage_kind)
516
# Some streams embed the whole stream into the wire
517
# representation of the first record, which means that
518
# later records have no wire representation: we skip them.
519
yield pack_writer.bytes_record(serialised, [(substream_type,)])
520
yield pack_writer.end()
523
class _ByteStreamDecoder(object):
524
"""Helper for _byte_stream_to_stream.
526
The expected usage of this class is via the function _byte_stream_to_stream
527
which creates a _ByteStreamDecoder, pops off the stream format and then
528
yields the output of record_stream(), the main entry point to
531
Broadly this class has to unwrap two layers of iterators:
535
This is complicated by wishing to return type, iterator_for_type, but
536
getting the data for iterator_for_type when we find out type: we can't
537
simply pass a generator down to the NetworkRecordStream parser, instead
538
we have a little local state to seed each NetworkRecordStream instance,
539
and gather the type that we'll be yielding.
541
:ivar byte_stream: The byte stream being decoded.
542
:ivar stream_decoder: A pack parser used to decode the bytestream
543
:ivar current_type: The current type, used to join adjacent records of the
544
same type into a single stream.
545
:ivar first_bytes: The first bytes to give the next NetworkRecordStream.
548
def __init__(self, byte_stream, record_counter):
549
"""Create a _ByteStreamDecoder."""
550
self.stream_decoder = pack.ContainerPushParser()
551
self.current_type = None
552
self.first_bytes = None
553
self.byte_stream = byte_stream
554
self._record_counter = record_counter
557
def iter_stream_decoder(self):
558
"""Iterate the contents of the pack from stream_decoder."""
559
# dequeue pending items
560
for record in self.stream_decoder.read_pending_records():
562
# Pull bytes of the wire, decode them to records, yield those records.
563
for bytes in self.byte_stream:
564
self.stream_decoder.accept_bytes(bytes)
565
for record in self.stream_decoder.read_pending_records():
568
def iter_substream_bytes(self):
569
if self.first_bytes is not None:
570
yield self.first_bytes
571
# If we run out of pack records, single the outer layer to stop.
572
self.first_bytes = None
573
for record in self.iter_pack_records:
574
record_names, record_bytes = record
575
record_name, = record_names
576
substream_type = record_name[0]
577
if substream_type != self.current_type:
578
# end of a substream, seed the next substream.
579
self.current_type = substream_type
580
self.first_bytes = record_bytes
584
def record_stream(self):
585
"""Yield substream_type, substream from the byte stream."""
586
def wrap_and_count(pb, rc, substream):
587
"""Yield records from stream while showing progress."""
590
if self.current_type != 'revisions' and self.key_count != 0:
591
# As we know the number of revisions now (in self.key_count)
592
# we can setup and use record_counter (rc).
593
if not rc.is_initialized():
594
rc.setup(self.key_count, self.key_count)
595
for record in substream.read():
597
if rc.is_initialized() and counter == rc.STEP:
598
rc.increment(counter)
599
pb.update('Estimate', rc.current, rc.max)
601
if self.current_type == 'revisions':
602
# Total records is proportional to number of revs
603
# to fetch. With remote, we used self.key_count to
604
# track the number of revs. Once we have the revs
605
# counts in self.key_count, the progress bar changes
606
# from 'Estimating..' to 'Estimate' above.
608
if counter == rc.STEP:
609
pb.update('Estimating..', self.key_count)
615
pb = ui.ui_factory.nested_progress_bar()
616
rc = self._record_counter
617
# Make and consume sub generators, one per substream type:
618
while self.first_bytes is not None:
619
substream = NetworkRecordStream(self.iter_substream_bytes())
620
# after substream is fully consumed, self.current_type is set to
621
# the next type, and self.first_bytes is set to the matching bytes.
622
yield self.current_type, wrap_and_count(pb, rc, substream)
624
pb.update('Done', rc.max, rc.max)
627
def seed_state(self):
628
"""Prepare the _ByteStreamDecoder to decode from the pack stream."""
629
# Set a single generator we can use to get data from the pack stream.
630
self.iter_pack_records = self.iter_stream_decoder()
631
# Seed the very first subiterator with content; after this each one
633
list(self.iter_substream_bytes())
636
def _byte_stream_to_stream(byte_stream, record_counter=None):
637
"""Convert a byte stream into a format and a stream.
639
:param byte_stream: A bytes iterator, as output by _stream_to_byte_stream.
640
:return: (RepositoryFormat, stream_generator)
642
decoder = _ByteStreamDecoder(byte_stream, record_counter)
643
for bytes in byte_stream:
644
decoder.stream_decoder.accept_bytes(bytes)
645
for record in decoder.stream_decoder.read_pending_records(max=1):
646
record_names, src_format_name = record
647
src_format = network_format_registry.get(src_format_name)
648
return src_format, decoder.record_stream()
301
651
class SmartServerRepositoryUnlock(SmartServerRepositoryRequest):
303
653
def do_repository_request(self, repository, token):
363
723
dirname = dirname.encode(sys.getfilesystemencoding())
364
724
# python's tarball module includes the whole path by default so
366
assert dirname.endswith('.bzr')
726
if not dirname.endswith('.bzr'):
727
raise ValueError(dirname)
367
728
tarball.add(dirname, '.bzr') # recursive by default
372
class SmartServerRepositoryStreamKnitDataForRevisions(SmartServerRepositoryRequest):
373
"""Bzr <= 1.1 streaming pull, buffers all data on server."""
375
def do_repository_request(self, repository, *revision_ids):
376
repository.lock_read()
378
return self._do_repository_request(repository, revision_ids)
382
def _do_repository_request(self, repository, revision_ids):
383
stream = repository.get_data_stream_for_search(
384
repository.revision_ids_to_search_result(set(revision_ids)))
386
pack = ContainerSerialiser()
387
buffer.write(pack.begin())
389
for name_tuple, bytes in stream:
390
buffer.write(pack.bytes_record(bytes, [name_tuple]))
391
except errors.RevisionNotPresent, e:
392
return FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
393
buffer.write(pack.end())
394
return SuccessfulSmartServerResponse(('ok',), buffer.getvalue())
397
class SmartServerRepositoryStreamRevisionsChunked(SmartServerRepositoryRequest):
398
"""Bzr 1.1+ streaming pull."""
400
def do_body(self, body_bytes):
401
repository = self._repository
402
repository.lock_read()
404
search, error = self.recreate_search(repository, body_bytes)
405
if error is not None:
407
stream = repository.get_data_stream_for_search(search.get_result())
409
# On non-error, unlocking is done by the body stream handler.
412
return SuccessfulSmartServerResponse(('ok',),
413
body_stream=self.body_stream(stream, repository))
415
def body_stream(self, stream, repository):
416
pack = ContainerSerialiser()
419
for name_tuple, bytes in stream:
420
yield pack.bytes_record(bytes, [name_tuple])
421
except errors.RevisionNotPresent, e:
422
# This shouldn't be able to happen, but as we don't buffer
423
# everything it can in theory happen.
424
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
733
class SmartServerRepositoryInsertStreamLocked(SmartServerRepositoryRequest):
734
"""Insert a record stream from a RemoteSink into a repository.
736
This gets bytes pushed to it by the network infrastructure and turns that
737
into a bytes iterator using a thread. That is then processed by
738
_byte_stream_to_stream.
743
def do_repository_request(self, repository, resume_tokens, lock_token):
744
"""StreamSink.insert_stream for a remote repository."""
745
repository.lock_write(token=lock_token)
746
self.do_insert_stream_request(repository, resume_tokens)
748
def do_insert_stream_request(self, repository, resume_tokens):
749
tokens = [token for token in resume_tokens.split(' ') if token]
751
self.repository = repository
752
self.queue = Queue.Queue()
753
self.insert_thread = threading.Thread(target=self._inserter_thread)
754
self.insert_thread.start()
756
def do_chunk(self, body_stream_chunk):
757
self.queue.put(body_stream_chunk)
759
def _inserter_thread(self):
761
src_format, stream = _byte_stream_to_stream(
762
self.blocking_byte_stream())
763
self.insert_result = self.repository._get_sink().insert_stream(
764
stream, src_format, self.tokens)
765
self.insert_ok = True
767
self.insert_exception = sys.exc_info()
768
self.insert_ok = False
770
def blocking_byte_stream(self):
772
bytes = self.queue.get()
773
if bytes is StopIteration:
779
self.queue.put(StopIteration)
780
if self.insert_thread is not None:
781
self.insert_thread.join()
782
if not self.insert_ok:
783
exc_info = self.insert_exception
784
raise exc_info[0], exc_info[1], exc_info[2]
785
write_group_tokens, missing_keys = self.insert_result
786
if write_group_tokens or missing_keys:
787
# bzip needed? missing keys should typically be a small set.
788
# Should this be a streaming body response ?
789
missing_keys = sorted(missing_keys)
790
bytes = bencode.bencode((write_group_tokens, missing_keys))
791
self.repository.unlock()
792
return SuccessfulSmartServerResponse(('missing-basis', bytes))
794
self.repository.unlock()
795
return SuccessfulSmartServerResponse(('ok', ))
798
class SmartServerRepositoryInsertStream_1_19(SmartServerRepositoryInsertStreamLocked):
799
"""Insert a record stream from a RemoteSink into a repository.
801
Same as SmartServerRepositoryInsertStreamLocked, except:
802
- the lock token argument is optional
803
- servers that implement this verb accept 'inventory-delta' records in the
809
def do_repository_request(self, repository, resume_tokens, lock_token=None):
810
"""StreamSink.insert_stream for a remote repository."""
811
SmartServerRepositoryInsertStreamLocked.do_repository_request(
812
self, repository, resume_tokens, lock_token)
815
class SmartServerRepositoryInsertStream(SmartServerRepositoryInsertStreamLocked):
816
"""Insert a record stream from a RemoteSink into an unlocked repository.
818
This is the same as SmartServerRepositoryInsertStreamLocked, except it
819
takes no lock_tokens; i.e. it works with an unlocked (or lock-free, e.g.
820
like pack format) repository.
825
def do_repository_request(self, repository, resume_tokens):
826
"""StreamSink.insert_stream for a remote repository."""
827
repository.lock_write()
828
self.do_insert_stream_request(repository, resume_tokens)