38
51
def do(self, path, *args):
39
52
"""Execute a repository request.
41
The repository must be at the exact path - no searching is done.
54
All Repository requests take a path to the repository as their first
55
argument. The repository must be at the exact path given by the
56
client - no searching is done.
43
58
The actual logic is delegated to self.do_repository_request.
45
:param path: The path for the repository.
46
:return: A smart server from self.do_repository_request().
60
:param client_path: The path for the repository as received from the
62
:return: A SmartServerResponse from self.do_repository_request().
48
transport = self._backing_transport.clone(path)
64
transport = self.transport_from_client_path(path)
49
65
bzrdir = BzrDir.open_from_transport(transport)
50
repository = bzrdir.open_repository()
51
return self.do_repository_request(repository, *args)
54
class SmartServerRepositoryGetRevisionGraph(SmartServerRepositoryRequest):
56
def do_repository_request(self, repository, revision_id):
66
# Save the repository for use with do_body.
67
self._repository = bzrdir.open_repository()
68
return self.do_repository_request(self._repository, *args)
70
def do_repository_request(self, repository, *args):
71
"""Override to provide an implementation for a verb."""
72
# No-op for verbs that take bodies (None as a result indicates a body
76
def recreate_search(self, repository, search_bytes, discard_excess=False):
77
"""Recreate a search from its serialised form.
79
:param discard_excess: If True, and the search refers to data we don't
80
have, just silently accept that fact - the verb calling
81
recreate_search trusts that clients will look for missing things
82
they expected and get it from elsewhere.
84
lines = search_bytes.split('\n')
85
if lines[0] == 'ancestry-of':
87
search_result = graph.PendingAncestryResult(heads, repository)
88
return search_result, None
89
elif lines[0] == 'search':
90
return self.recreate_search_from_recipe(repository, lines[1:],
91
discard_excess=discard_excess)
93
return (None, FailedSmartServerResponse(('BadSearch',)))
95
def recreate_search_from_recipe(self, repository, lines,
96
discard_excess=False):
97
"""Recreate a specific revision search (vs a from-tip search).
99
:param discard_excess: If True, and the search refers to data we don't
100
have, just silently accept that fact - the verb calling
101
recreate_search trusts that clients will look for missing things
102
they expected and get it from elsewhere.
104
start_keys = set(lines[0].split(' '))
105
exclude_keys = set(lines[1].split(' '))
106
revision_count = int(lines[2])
107
repository.lock_read()
109
search = repository.get_graph()._make_breadth_first_searcher(
113
next_revs = search.next()
114
except StopIteration:
116
search.stop_searching_any(exclude_keys.intersection(next_revs))
117
search_result = search.get_result()
118
if (not discard_excess and
119
search_result.get_recipe()[3] != revision_count):
120
# we got back a different amount of data than expected, this
121
# gets reported as NoSuchRevision, because less revisions
122
# indicates missing revisions, and more should never happen as
123
# the excludes list considers ghosts and ensures that ghost
124
# filling races are not a problem.
125
return (None, FailedSmartServerResponse(('NoSuchRevision',)))
126
return (search_result, None)
131
class SmartServerRepositoryReadLocked(SmartServerRepositoryRequest):
132
"""Calls self.do_readlocked_repository_request."""
134
def do_repository_request(self, repository, *args):
135
"""Read lock a repository for do_readlocked_repository_request."""
136
repository.lock_read()
138
return self.do_readlocked_repository_request(repository, *args)
143
class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
144
"""Bzr 1.2+ - get parent data for revisions during a graph search."""
146
no_extra_results = False
148
def do_repository_request(self, repository, *revision_ids):
149
"""Get parent details for some revisions.
151
All the parents for revision_ids are returned. Additionally up to 64KB
152
of additional parent data found by performing a breadth first search
153
from revision_ids is returned. The verb takes a body containing the
154
current search state, see do_body for details.
156
If 'include-missing:' is in revision_ids, ghosts encountered in the
157
graph traversal for getting parent data are included in the result with
158
a prefix of 'missing:'.
160
:param repository: The repository to query in.
161
:param revision_ids: The utf8 encoded revision_id to answer for.
163
self._revision_ids = revision_ids
164
return None # Signal that we want a body.
166
def do_body(self, body_bytes):
167
"""Process the current search state and perform the parent lookup.
169
:return: A smart server response where the body contains an utf8
170
encoded flattened list of the parents of the revisions (the same
171
format as Repository.get_revision_graph) which has been bz2
174
repository = self._repository
175
repository.lock_read()
177
return self._do_repository_request(body_bytes)
181
def _do_repository_request(self, body_bytes):
182
repository = self._repository
183
revision_ids = set(self._revision_ids)
184
include_missing = 'include-missing:' in revision_ids
186
revision_ids.remove('include-missing:')
187
body_lines = body_bytes.split('\n')
188
search_result, error = self.recreate_search_from_recipe(
189
repository, body_lines)
190
if error is not None:
192
# TODO might be nice to start up the search again; but thats not
193
# written or tested yet.
194
client_seen_revs = set(search_result.get_keys())
195
# Always include the requested ids.
196
client_seen_revs.difference_update(revision_ids)
198
repo_graph = repository.get_graph()
202
next_revs = revision_ids
203
first_loop_done = False
205
queried_revs.update(next_revs)
206
parent_map = repo_graph.get_parent_map(next_revs)
207
current_revs = next_revs
209
for revision_id in current_revs:
211
parents = parent_map.get(revision_id)
212
if parents is not None:
213
# adjust for the wire
214
if parents == (_mod_revision.NULL_REVISION,):
216
# prepare the next query
217
next_revs.update(parents)
218
encoded_id = revision_id
221
encoded_id = "missing:" + revision_id
223
if (revision_id not in client_seen_revs and
224
(not missing_rev or include_missing)):
225
# Client does not have this revision, give it to it.
226
# add parents to the result
227
result[encoded_id] = parents
228
# Approximate the serialized cost of this revision_id.
229
size_so_far += 2 + len(encoded_id) + sum(map(len, parents))
230
# get all the directly asked for parents, and then flesh out to
231
# 64K (compressed) or so. We do one level of depth at a time to
232
# stay in sync with the client. The 250000 magic number is
233
# estimated compression ratio taken from bzr.dev itself.
234
if self.no_extra_results or (
235
first_loop_done and size_so_far > 250000):
238
# don't query things we've already queried
239
next_revs.difference_update(queried_revs)
240
first_loop_done = True
242
# sorting trivially puts lexographically similar revision ids together.
244
for revision, parents in sorted(result.items()):
245
lines.append(' '.join((revision, ) + tuple(parents)))
247
return SuccessfulSmartServerResponse(
248
('ok', ), bz2.compress('\n'.join(lines)))
251
class SmartServerRepositoryGetRevisionGraph(SmartServerRepositoryReadLocked):
253
def do_readlocked_repository_request(self, repository, revision_id):
57
254
"""Return the result of repository.get_revision_graph(revision_id).
256
Deprecated as of bzr 1.4, but supported for older clients.
59
258
:param repository: The repository to query in.
60
259
:param revision_id: The utf8 encoded revision_id to get a graph from.
61
260
:return: A smart server response where the body contains an utf8
176
407
return SuccessfulSmartServerResponse(('ok', token))
410
class SmartServerRepositoryGetStream(SmartServerRepositoryRequest):
412
def do_repository_request(self, repository, to_network_name):
413
"""Get a stream for inserting into a to_format repository.
415
:param repository: The repository to stream from.
416
:param to_network_name: The network name of the format of the target
419
self._to_format = network_format_registry.get(to_network_name)
420
if self._should_fake_unknown():
421
return FailedSmartServerResponse(
422
('UnknownMethod', 'Repository.get_stream'))
423
return None # Signal that we want a body.
425
def _should_fake_unknown(self):
426
"""Return True if we should return UnknownMethod to the client.
428
This is a workaround for bugs in pre-1.19 clients that claim to
429
support receiving streams of CHK repositories. The pre-1.19 client
430
expects inventory records to be serialized in the format defined by
431
to_network_name, but in pre-1.19 (at least) that format definition
432
tries to use the xml5 serializer, which does not correctly handle
433
rich-roots. After 1.19 the client can also accept inventory-deltas
434
(which avoids this issue), and those clients will use the
435
Repository.get_stream_1.19 verb instead of this one.
436
So: if this repository is CHK, and the to_format doesn't match,
437
we should just fake an UnknownSmartMethod error so that the client
438
will fallback to VFS, rather than sending it a stream we know it
441
from_format = self._repository._format
442
to_format = self._to_format
443
if not from_format.supports_chks:
444
# Source not CHK: that's ok
446
if (to_format.supports_chks and
447
from_format.repository_class is to_format.repository_class and
448
from_format._serializer == to_format._serializer):
449
# Source is CHK, but target matches: that's ok
450
# (e.g. 2a->2a, or CHK2->2a)
452
# Source is CHK, and target is not CHK or incompatible CHK. We can't
453
# generate a compatible stream.
456
def do_body(self, body_bytes):
457
repository = self._repository
458
repository.lock_read()
460
search_result, error = self.recreate_search(repository, body_bytes,
462
if error is not None:
465
source = repository._get_source(self._to_format)
466
stream = source.get_stream(search_result)
468
exc_info = sys.exc_info()
470
# On non-error, unlocking is done by the body stream handler.
473
raise exc_info[0], exc_info[1], exc_info[2]
474
return SuccessfulSmartServerResponse(('ok',),
475
body_stream=self.body_stream(stream, repository))
477
def body_stream(self, stream, repository):
478
byte_stream = _stream_to_byte_stream(stream, repository._format)
480
for bytes in byte_stream:
482
except errors.RevisionNotPresent, e:
483
# This shouldn't be able to happen, but as we don't buffer
484
# everything it can in theory happen.
486
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
491
class SmartServerRepositoryGetStream_1_19(SmartServerRepositoryGetStream):
493
def _should_fake_unknown(self):
494
"""Returns False; we don't need to workaround bugs in 1.19+ clients."""
498
def _stream_to_byte_stream(stream, src_format):
499
"""Convert a record stream to a self delimited byte stream."""
500
pack_writer = pack.ContainerSerialiser()
501
yield pack_writer.begin()
502
yield pack_writer.bytes_record(src_format.network_name(), '')
503
for substream_type, substream in stream:
504
for record in substream:
505
if record.storage_kind in ('chunked', 'fulltext'):
506
serialised = record_to_fulltext_bytes(record)
507
elif record.storage_kind == 'absent':
508
raise ValueError("Absent factory for %s" % (record.key,))
510
serialised = record.get_bytes_as(record.storage_kind)
512
# Some streams embed the whole stream into the wire
513
# representation of the first record, which means that
514
# later records have no wire representation: we skip them.
515
yield pack_writer.bytes_record(serialised, [(substream_type,)])
516
yield pack_writer.end()
519
class _ByteStreamDecoder(object):
520
"""Helper for _byte_stream_to_stream.
522
The expected usage of this class is via the function _byte_stream_to_stream
523
which creates a _ByteStreamDecoder, pops off the stream format and then
524
yields the output of record_stream(), the main entry point to
527
Broadly this class has to unwrap two layers of iterators:
531
This is complicated by wishing to return type, iterator_for_type, but
532
getting the data for iterator_for_type when we find out type: we can't
533
simply pass a generator down to the NetworkRecordStream parser, instead
534
we have a little local state to seed each NetworkRecordStream instance,
535
and gather the type that we'll be yielding.
537
:ivar byte_stream: The byte stream being decoded.
538
:ivar stream_decoder: A pack parser used to decode the bytestream
539
:ivar current_type: The current type, used to join adjacent records of the
540
same type into a single stream.
541
:ivar first_bytes: The first bytes to give the next NetworkRecordStream.
544
def __init__(self, byte_stream, record_counter):
545
"""Create a _ByteStreamDecoder."""
546
self.stream_decoder = pack.ContainerPushParser()
547
self.current_type = None
548
self.first_bytes = None
549
self.byte_stream = byte_stream
550
self._record_counter = record_counter
553
def iter_stream_decoder(self):
554
"""Iterate the contents of the pack from stream_decoder."""
555
# dequeue pending items
556
for record in self.stream_decoder.read_pending_records():
558
# Pull bytes of the wire, decode them to records, yield those records.
559
for bytes in self.byte_stream:
560
self.stream_decoder.accept_bytes(bytes)
561
for record in self.stream_decoder.read_pending_records():
564
def iter_substream_bytes(self):
565
if self.first_bytes is not None:
566
yield self.first_bytes
567
# If we run out of pack records, single the outer layer to stop.
568
self.first_bytes = None
569
for record in self.iter_pack_records:
570
record_names, record_bytes = record
571
record_name, = record_names
572
substream_type = record_name[0]
573
if substream_type != self.current_type:
574
# end of a substream, seed the next substream.
575
self.current_type = substream_type
576
self.first_bytes = record_bytes
580
def record_stream(self):
581
"""Yield substream_type, substream from the byte stream."""
582
def wrap_and_count(pb, rc, substream):
583
"""Yield records from stream while showing progress."""
586
if self.current_type != 'revisions' and self.key_count != 0:
587
# As we know the number of revisions now (in self.key_count)
588
# we can setup and use record_counter (rc).
589
if not rc.is_initialized():
590
rc.setup(self.key_count, self.key_count)
591
for record in substream.read():
593
if rc.is_initialized() and counter == rc.STEP:
594
rc.increment(counter)
595
pb.update('Estimate', rc.current, rc.max)
597
if self.current_type == 'revisions':
598
# Total records is proportional to number of revs
599
# to fetch. With remote, we used self.key_count to
600
# track the number of revs. Once we have the revs
601
# counts in self.key_count, the progress bar changes
602
# from 'Estimating..' to 'Estimate' above.
604
if counter == rc.STEP:
605
pb.update('Estimating..', self.key_count)
611
pb = ui.ui_factory.nested_progress_bar()
612
rc = self._record_counter
613
# Make and consume sub generators, one per substream type:
614
while self.first_bytes is not None:
615
substream = NetworkRecordStream(self.iter_substream_bytes())
616
# after substream is fully consumed, self.current_type is set to
617
# the next type, and self.first_bytes is set to the matching bytes.
618
yield self.current_type, wrap_and_count(pb, rc, substream)
620
pb.update('Done', rc.max, rc.max)
623
def seed_state(self):
624
"""Prepare the _ByteStreamDecoder to decode from the pack stream."""
625
# Set a single generator we can use to get data from the pack stream.
626
self.iter_pack_records = self.iter_stream_decoder()
627
# Seed the very first subiterator with content; after this each one
629
list(self.iter_substream_bytes())
632
def _byte_stream_to_stream(byte_stream, record_counter=None):
633
"""Convert a byte stream into a format and a stream.
635
:param byte_stream: A bytes iterator, as output by _stream_to_byte_stream.
636
:return: (RepositoryFormat, stream_generator)
638
decoder = _ByteStreamDecoder(byte_stream, record_counter)
639
for bytes in byte_stream:
640
decoder.stream_decoder.accept_bytes(bytes)
641
for record in decoder.stream_decoder.read_pending_records(max=1):
642
record_names, src_format_name = record
643
src_format = network_format_registry.get(src_format_name)
644
return src_format, decoder.record_stream()
179
647
class SmartServerRepositoryUnlock(SmartServerRepositoryRequest):
181
649
def do_repository_request(self, repository, token):
241
719
dirname = dirname.encode(sys.getfilesystemencoding())
242
720
# python's tarball module includes the whole path by default so
244
assert dirname.endswith('.bzr')
722
if not dirname.endswith('.bzr'):
723
raise ValueError(dirname)
245
724
tarball.add(dirname, '.bzr') # recursive by default
250
class SmartServerRepositoryStreamKnitDataForRevisions(SmartServerRepositoryRequest):
252
def do_repository_request(self, repository, *revision_ids):
253
repository.lock_read()
255
return self._do_repository_request(repository, revision_ids)
259
def _do_repository_request(self, repository, revision_ids):
260
stream = repository.get_data_stream(revision_ids)
261
filelike = StringIO()
262
pack = ContainerWriter(filelike.write)
265
for name_tuple, bytes in stream:
266
pack.add_bytes_record(bytes, [name_tuple])
267
except errors.RevisionNotPresent, e:
268
return FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
270
return SuccessfulSmartServerResponse(('ok',), filelike.getvalue())
729
class SmartServerRepositoryInsertStreamLocked(SmartServerRepositoryRequest):
730
"""Insert a record stream from a RemoteSink into a repository.
732
This gets bytes pushed to it by the network infrastructure and turns that
733
into a bytes iterator using a thread. That is then processed by
734
_byte_stream_to_stream.
739
def do_repository_request(self, repository, resume_tokens, lock_token):
740
"""StreamSink.insert_stream for a remote repository."""
741
repository.lock_write(token=lock_token)
742
self.do_insert_stream_request(repository, resume_tokens)
744
def do_insert_stream_request(self, repository, resume_tokens):
745
tokens = [token for token in resume_tokens.split(' ') if token]
747
self.repository = repository
748
self.queue = Queue.Queue()
749
self.insert_thread = threading.Thread(target=self._inserter_thread)
750
self.insert_thread.start()
752
def do_chunk(self, body_stream_chunk):
753
self.queue.put(body_stream_chunk)
755
def _inserter_thread(self):
757
src_format, stream = _byte_stream_to_stream(
758
self.blocking_byte_stream())
759
self.insert_result = self.repository._get_sink().insert_stream(
760
stream, src_format, self.tokens)
761
self.insert_ok = True
763
self.insert_exception = sys.exc_info()
764
self.insert_ok = False
766
def blocking_byte_stream(self):
768
bytes = self.queue.get()
769
if bytes is StopIteration:
775
self.queue.put(StopIteration)
776
if self.insert_thread is not None:
777
self.insert_thread.join()
778
if not self.insert_ok:
779
exc_info = self.insert_exception
780
raise exc_info[0], exc_info[1], exc_info[2]
781
write_group_tokens, missing_keys = self.insert_result
782
if write_group_tokens or missing_keys:
783
# bzip needed? missing keys should typically be a small set.
784
# Should this be a streaming body response ?
785
missing_keys = sorted(missing_keys)
786
bytes = bencode.bencode((write_group_tokens, missing_keys))
787
self.repository.unlock()
788
return SuccessfulSmartServerResponse(('missing-basis', bytes))
790
self.repository.unlock()
791
return SuccessfulSmartServerResponse(('ok', ))
794
class SmartServerRepositoryInsertStream_1_19(SmartServerRepositoryInsertStreamLocked):
795
"""Insert a record stream from a RemoteSink into a repository.
797
Same as SmartServerRepositoryInsertStreamLocked, except:
798
- the lock token argument is optional
799
- servers that implement this verb accept 'inventory-delta' records in the
805
def do_repository_request(self, repository, resume_tokens, lock_token=None):
806
"""StreamSink.insert_stream for a remote repository."""
807
SmartServerRepositoryInsertStreamLocked.do_repository_request(
808
self, repository, resume_tokens, lock_token)
811
class SmartServerRepositoryInsertStream(SmartServerRepositoryInsertStreamLocked):
812
"""Insert a record stream from a RemoteSink into an unlocked repository.
814
This is the same as SmartServerRepositoryInsertStreamLocked, except it
815
takes no lock_tokens; i.e. it works with an unlocked (or lock-free, e.g.
816
like pack format) repository.
821
def do_repository_request(self, repository, resume_tokens):
822
"""StreamSink.insert_stream for a remote repository."""
823
repository.lock_write()
824
self.do_insert_stream_request(repository, resume_tokens)