185
120
repository.unlock()
187
def _expand_requested_revs(self, repo_graph, revision_ids, client_seen_revs,
188
include_missing, max_size=65536):
122
def _do_repository_request(self, body_bytes):
123
repository = self._repository
124
revision_ids = set(self._revision_ids)
125
search, error = self.recreate_search(repository, body_bytes)
126
if error is not None:
128
# TODO might be nice to start up the search again; but thats not
129
# written or tested yet.
130
client_seen_revs = set(search.get_result().get_keys())
131
# Always include the requested ids.
132
client_seen_revs.difference_update(revision_ids)
134
repo_graph = repository.get_graph()
190
136
queried_revs = set()
191
estimator = estimate_compressed_size.ZLibEstimator(max_size)
192
138
next_revs = revision_ids
193
139
first_loop_done = False
195
141
queried_revs.update(next_revs)
196
142
parent_map = repo_graph.get_parent_map(next_revs)
197
current_revs = next_revs
198
143
next_revs = set()
199
for revision_id in current_revs:
201
parents = parent_map.get(revision_id)
202
if parents is not None:
203
# adjust for the wire
204
if parents == (_mod_revision.NULL_REVISION,):
206
# prepare the next query
207
next_revs.update(parents)
208
encoded_id = revision_id
211
encoded_id = "missing:" + revision_id
213
if (revision_id not in client_seen_revs and
214
(not missing_rev or include_missing)):
144
for revision_id, parents in parent_map.iteritems():
145
# adjust for the wire
146
if parents == (_mod_revision.NULL_REVISION,):
148
# prepare the next query
149
next_revs.update(parents)
150
if revision_id not in client_seen_revs:
215
151
# Client does not have this revision, give it to it.
216
152
# add parents to the result
217
result[encoded_id] = parents
153
result[revision_id] = parents
218
154
# Approximate the serialized cost of this revision_id.
219
line = '%s %s\n' % (encoded_id, ' '.join(parents))
220
estimator.add_content(line)
155
size_so_far += 2 + len(revision_id) + sum(map(len, parents))
221
156
# get all the directly asked for parents, and then flesh out to
222
157
# 64K (compressed) or so. We do one level of depth at a time to
223
158
# stay in sync with the client. The 250000 magic number is
224
159
# estimated compression ratio taken from bzr.dev itself.
225
if self.no_extra_results or (first_loop_done and estimator.full()):
226
trace.mutter('size: %d, z_size: %d'
227
% (estimator._uncompressed_size_added,
228
estimator._compressed_size_added))
160
if first_loop_done and size_so_far > 250000:
229
161
next_revs = set()
231
163
# don't query things we've already queried
232
next_revs = next_revs.difference(queried_revs)
164
next_revs.difference_update(queried_revs)
233
165
first_loop_done = True
236
def _do_repository_request(self, body_bytes):
237
repository = self._repository
238
revision_ids = set(self._revision_ids)
239
include_missing = 'include-missing:' in revision_ids
241
revision_ids.remove('include-missing:')
242
body_lines = body_bytes.split('\n')
243
search_result, error = self.recreate_search_from_recipe(
244
repository, body_lines)
245
if error is not None:
247
# TODO might be nice to start up the search again; but thats not
248
# written or tested yet.
249
client_seen_revs = set(search_result.get_keys())
250
# Always include the requested ids.
251
client_seen_revs.difference_update(revision_ids)
253
repo_graph = repository.get_graph()
254
result = self._expand_requested_revs(repo_graph, revision_ids,
255
client_seen_revs, include_missing)
257
167
# sorting trivially puts lexographically similar revision ids together.
258
168
# Compression FTW.
260
169
for revision, parents in sorted(result.items()):
261
170
lines.append(' '.join((revision, ) + tuple(parents)))
423
298
return SuccessfulSmartServerResponse(('ok', token))
426
class SmartServerRepositoryGetStream(SmartServerRepositoryRequest):
428
def do_repository_request(self, repository, to_network_name):
429
"""Get a stream for inserting into a to_format repository.
431
The request body is 'search_bytes', a description of the revisions
434
In 2.3 this verb added support for search_bytes == 'everything'. Older
435
implementations will respond with a BadSearch error, and clients should
436
catch this and fallback appropriately.
438
:param repository: The repository to stream from.
439
:param to_network_name: The network name of the format of the target
442
self._to_format = network_format_registry.get(to_network_name)
443
if self._should_fake_unknown():
444
return FailedSmartServerResponse(
445
('UnknownMethod', 'Repository.get_stream'))
446
return None # Signal that we want a body.
448
def _should_fake_unknown(self):
449
"""Return True if we should return UnknownMethod to the client.
451
This is a workaround for bugs in pre-1.19 clients that claim to
452
support receiving streams of CHK repositories. The pre-1.19 client
453
expects inventory records to be serialized in the format defined by
454
to_network_name, but in pre-1.19 (at least) that format definition
455
tries to use the xml5 serializer, which does not correctly handle
456
rich-roots. After 1.19 the client can also accept inventory-deltas
457
(which avoids this issue), and those clients will use the
458
Repository.get_stream_1.19 verb instead of this one.
459
So: if this repository is CHK, and the to_format doesn't match,
460
we should just fake an UnknownSmartMethod error so that the client
461
will fallback to VFS, rather than sending it a stream we know it
464
from_format = self._repository._format
465
to_format = self._to_format
466
if not from_format.supports_chks:
467
# Source not CHK: that's ok
469
if (to_format.supports_chks and
470
from_format.repository_class is to_format.repository_class and
471
from_format._serializer == to_format._serializer):
472
# Source is CHK, but target matches: that's ok
473
# (e.g. 2a->2a, or CHK2->2a)
475
# Source is CHK, and target is not CHK or incompatible CHK. We can't
476
# generate a compatible stream.
479
def do_body(self, body_bytes):
480
repository = self._repository
481
repository.lock_read()
483
search_result, error = self.recreate_search(repository, body_bytes,
485
if error is not None:
488
source = repository._get_source(self._to_format)
489
stream = source.get_stream(search_result)
491
exc_info = sys.exc_info()
493
# On non-error, unlocking is done by the body stream handler.
496
raise exc_info[0], exc_info[1], exc_info[2]
497
return SuccessfulSmartServerResponse(('ok',),
498
body_stream=self.body_stream(stream, repository))
500
def body_stream(self, stream, repository):
501
byte_stream = _stream_to_byte_stream(stream, repository._format)
503
for bytes in byte_stream:
505
except errors.RevisionNotPresent, e:
506
# This shouldn't be able to happen, but as we don't buffer
507
# everything it can in theory happen.
509
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
514
class SmartServerRepositoryGetStream_1_19(SmartServerRepositoryGetStream):
515
"""The same as Repository.get_stream, but will return stream CHK formats to
518
See SmartServerRepositoryGetStream._should_fake_unknown.
523
def _should_fake_unknown(self):
524
"""Returns False; we don't need to workaround bugs in 1.19+ clients."""
528
def _stream_to_byte_stream(stream, src_format):
529
"""Convert a record stream to a self delimited byte stream."""
530
pack_writer = pack.ContainerSerialiser()
531
yield pack_writer.begin()
532
yield pack_writer.bytes_record(src_format.network_name(), '')
533
for substream_type, substream in stream:
534
for record in substream:
535
if record.storage_kind in ('chunked', 'fulltext'):
536
serialised = record_to_fulltext_bytes(record)
537
elif record.storage_kind == 'absent':
538
raise ValueError("Absent factory for %s" % (record.key,))
540
serialised = record.get_bytes_as(record.storage_kind)
542
# Some streams embed the whole stream into the wire
543
# representation of the first record, which means that
544
# later records have no wire representation: we skip them.
545
yield pack_writer.bytes_record(serialised, [(substream_type,)])
546
yield pack_writer.end()
549
class _ByteStreamDecoder(object):
550
"""Helper for _byte_stream_to_stream.
552
The expected usage of this class is via the function _byte_stream_to_stream
553
which creates a _ByteStreamDecoder, pops off the stream format and then
554
yields the output of record_stream(), the main entry point to
557
Broadly this class has to unwrap two layers of iterators:
561
This is complicated by wishing to return type, iterator_for_type, but
562
getting the data for iterator_for_type when we find out type: we can't
563
simply pass a generator down to the NetworkRecordStream parser, instead
564
we have a little local state to seed each NetworkRecordStream instance,
565
and gather the type that we'll be yielding.
567
:ivar byte_stream: The byte stream being decoded.
568
:ivar stream_decoder: A pack parser used to decode the bytestream
569
:ivar current_type: The current type, used to join adjacent records of the
570
same type into a single stream.
571
:ivar first_bytes: The first bytes to give the next NetworkRecordStream.
574
def __init__(self, byte_stream, record_counter):
575
"""Create a _ByteStreamDecoder."""
576
self.stream_decoder = pack.ContainerPushParser()
577
self.current_type = None
578
self.first_bytes = None
579
self.byte_stream = byte_stream
580
self._record_counter = record_counter
583
def iter_stream_decoder(self):
584
"""Iterate the contents of the pack from stream_decoder."""
585
# dequeue pending items
586
for record in self.stream_decoder.read_pending_records():
588
# Pull bytes of the wire, decode them to records, yield those records.
589
for bytes in self.byte_stream:
590
self.stream_decoder.accept_bytes(bytes)
591
for record in self.stream_decoder.read_pending_records():
594
def iter_substream_bytes(self):
595
if self.first_bytes is not None:
596
yield self.first_bytes
597
# If we run out of pack records, single the outer layer to stop.
598
self.first_bytes = None
599
for record in self.iter_pack_records:
600
record_names, record_bytes = record
601
record_name, = record_names
602
substream_type = record_name[0]
603
if substream_type != self.current_type:
604
# end of a substream, seed the next substream.
605
self.current_type = substream_type
606
self.first_bytes = record_bytes
610
def record_stream(self):
611
"""Yield substream_type, substream from the byte stream."""
612
def wrap_and_count(pb, rc, substream):
613
"""Yield records from stream while showing progress."""
616
if self.current_type != 'revisions' and self.key_count != 0:
617
# As we know the number of revisions now (in self.key_count)
618
# we can setup and use record_counter (rc).
619
if not rc.is_initialized():
620
rc.setup(self.key_count, self.key_count)
621
for record in substream.read():
623
if rc.is_initialized() and counter == rc.STEP:
624
rc.increment(counter)
625
pb.update('Estimate', rc.current, rc.max)
627
if self.current_type == 'revisions':
628
# Total records is proportional to number of revs
629
# to fetch. With remote, we used self.key_count to
630
# track the number of revs. Once we have the revs
631
# counts in self.key_count, the progress bar changes
632
# from 'Estimating..' to 'Estimate' above.
634
if counter == rc.STEP:
635
pb.update('Estimating..', self.key_count)
641
pb = ui.ui_factory.nested_progress_bar()
642
rc = self._record_counter
643
# Make and consume sub generators, one per substream type:
644
while self.first_bytes is not None:
645
substream = NetworkRecordStream(self.iter_substream_bytes())
646
# after substream is fully consumed, self.current_type is set to
647
# the next type, and self.first_bytes is set to the matching bytes.
648
yield self.current_type, wrap_and_count(pb, rc, substream)
650
pb.update('Done', rc.max, rc.max)
653
def seed_state(self):
654
"""Prepare the _ByteStreamDecoder to decode from the pack stream."""
655
# Set a single generator we can use to get data from the pack stream.
656
self.iter_pack_records = self.iter_stream_decoder()
657
# Seed the very first subiterator with content; after this each one
659
list(self.iter_substream_bytes())
662
def _byte_stream_to_stream(byte_stream, record_counter=None):
663
"""Convert a byte stream into a format and a stream.
665
:param byte_stream: A bytes iterator, as output by _stream_to_byte_stream.
666
:return: (RepositoryFormat, stream_generator)
668
decoder = _ByteStreamDecoder(byte_stream, record_counter)
669
for bytes in byte_stream:
670
decoder.stream_decoder.accept_bytes(bytes)
671
for record in decoder.stream_decoder.read_pending_records(max=1):
672
record_names, src_format_name = record
673
src_format = network_format_registry.get(src_format_name)
674
return src_format, decoder.record_stream()
677
301
class SmartServerRepositoryUnlock(SmartServerRepositoryRequest):
679
303
def do_repository_request(self, repository, token):
749
363
dirname = dirname.encode(sys.getfilesystemencoding())
750
364
# python's tarball module includes the whole path by default so
752
if not dirname.endswith('.bzr'):
753
raise ValueError(dirname)
366
assert dirname.endswith('.bzr')
754
367
tarball.add(dirname, '.bzr') # recursive by default
759
class SmartServerRepositoryInsertStreamLocked(SmartServerRepositoryRequest):
760
"""Insert a record stream from a RemoteSink into a repository.
762
This gets bytes pushed to it by the network infrastructure and turns that
763
into a bytes iterator using a thread. That is then processed by
764
_byte_stream_to_stream.
769
def do_repository_request(self, repository, resume_tokens, lock_token):
770
"""StreamSink.insert_stream for a remote repository."""
771
repository.lock_write(token=lock_token)
772
self.do_insert_stream_request(repository, resume_tokens)
774
def do_insert_stream_request(self, repository, resume_tokens):
775
tokens = [token for token in resume_tokens.split(' ') if token]
777
self.repository = repository
778
self.queue = Queue.Queue()
779
self.insert_thread = threading.Thread(target=self._inserter_thread)
780
self.insert_thread.start()
782
def do_chunk(self, body_stream_chunk):
783
self.queue.put(body_stream_chunk)
785
def _inserter_thread(self):
787
src_format, stream = _byte_stream_to_stream(
788
self.blocking_byte_stream())
789
self.insert_result = self.repository._get_sink().insert_stream(
790
stream, src_format, self.tokens)
791
self.insert_ok = True
793
self.insert_exception = sys.exc_info()
794
self.insert_ok = False
796
def blocking_byte_stream(self):
798
bytes = self.queue.get()
799
if bytes is StopIteration:
805
self.queue.put(StopIteration)
806
if self.insert_thread is not None:
807
self.insert_thread.join()
808
if not self.insert_ok:
809
exc_info = self.insert_exception
810
raise exc_info[0], exc_info[1], exc_info[2]
811
write_group_tokens, missing_keys = self.insert_result
812
if write_group_tokens or missing_keys:
813
# bzip needed? missing keys should typically be a small set.
814
# Should this be a streaming body response ?
815
missing_keys = sorted(missing_keys)
816
bytes = bencode.bencode((write_group_tokens, missing_keys))
817
self.repository.unlock()
818
return SuccessfulSmartServerResponse(('missing-basis', bytes))
820
self.repository.unlock()
821
return SuccessfulSmartServerResponse(('ok', ))
824
class SmartServerRepositoryInsertStream_1_19(SmartServerRepositoryInsertStreamLocked):
825
"""Insert a record stream from a RemoteSink into a repository.
827
Same as SmartServerRepositoryInsertStreamLocked, except:
828
- the lock token argument is optional
829
- servers that implement this verb accept 'inventory-delta' records in the
835
def do_repository_request(self, repository, resume_tokens, lock_token=None):
836
"""StreamSink.insert_stream for a remote repository."""
837
SmartServerRepositoryInsertStreamLocked.do_repository_request(
838
self, repository, resume_tokens, lock_token)
841
class SmartServerRepositoryInsertStream(SmartServerRepositoryInsertStreamLocked):
842
"""Insert a record stream from a RemoteSink into an unlocked repository.
844
This is the same as SmartServerRepositoryInsertStreamLocked, except it
845
takes no lock_tokens; i.e. it works with an unlocked (or lock-free, e.g.
846
like pack format) repository.
851
def do_repository_request(self, repository, resume_tokens):
852
"""StreamSink.insert_stream for a remote repository."""
853
repository.lock_write()
854
self.do_insert_stream_request(repository, resume_tokens)
372
class SmartServerRepositoryStreamKnitDataForRevisions(SmartServerRepositoryRequest):
373
"""Bzr <= 1.1 streaming pull, buffers all data on server."""
375
def do_repository_request(self, repository, *revision_ids):
376
repository.lock_read()
378
return self._do_repository_request(repository, revision_ids)
382
def _do_repository_request(self, repository, revision_ids):
383
stream = repository.get_data_stream_for_search(
384
repository.revision_ids_to_search_result(set(revision_ids)))
386
pack = ContainerSerialiser()
387
buffer.write(pack.begin())
390
for name_tuple, bytes in stream:
391
buffer.write(pack.bytes_record(bytes, [name_tuple]))
393
# Undo the lock_read that happens once the iterator from
394
# get_data_stream is started.
397
except errors.RevisionNotPresent, e:
398
return FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
399
buffer.write(pack.end())
400
return SuccessfulSmartServerResponse(('ok',), buffer.getvalue())
403
class SmartServerRepositoryStreamRevisionsChunked(SmartServerRepositoryRequest):
404
"""Bzr 1.1+ streaming pull."""
406
def do_body(self, body_bytes):
407
repository = self._repository
408
repository.lock_read()
410
search, error = self.recreate_search(repository, body_bytes)
411
if error is not None:
414
stream = repository.get_data_stream_for_search(search.get_result())
416
# On non-error, unlocking is done by the body stream handler.
419
return SuccessfulSmartServerResponse(('ok',),
420
body_stream=self.body_stream(stream, repository))
422
def body_stream(self, stream, repository):
423
pack = ContainerSerialiser()
426
for name_tuple, bytes in stream:
427
yield pack.bytes_record(bytes, [name_tuple])
428
except errors.RevisionNotPresent, e:
429
# This shouldn't be able to happen, but as we don't buffer
430
# everything it can in theory happen.
431
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))