~bzr-pqm/bzr/bzr.dev

3376.2.12 by Martin Pool
pyflakes corrections (thanks spiv)
1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
19
from itertools import izip
20
import md5
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
21
import time
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
22
23
from bzrlib import (
3603.2.1 by Andrew Bennetts
Remove duplicated class definitions, remove unused imports.
24
    debug,
25
    graph,
26
    pack,
27
    transactions,
28
    ui,
29
    )
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
30
from bzrlib.index import (
31
    GraphIndex,
32
    GraphIndexBuilder,
33
    InMemoryGraphIndex,
34
    CombinedGraphIndex,
35
    GraphIndexPrefixAdapter,
36
    )
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
37
from bzrlib.knit import (
38
    KnitPlainFactory,
39
    KnitVersionedFiles,
40
    _KnitGraphIndex,
41
    _DirectPackAccess,
42
    )
43
from bzrlib.osutils import rand_chars, split_lines
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
44
from bzrlib import tsort
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
45
""")
46
from bzrlib import (
47
    bzrdir,
48
    errors,
49
    lockable_files,
50
    lockdir,
3099.3.3 by John Arbash Meinel
Deprecate get_parents() in favor of get_parent_map()
51
    symbol_versioning,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
52
    xml5,
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
53
    xml6,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
54
    xml7,
55
    )
56
3603.2.1 by Andrew Bennetts
Remove duplicated class definitions, remove unused imports.
57
from bzrlib.decorators import needs_write_lock
2592.3.166 by Robert Collins
Merge KnitRepository3 removal branch.
58
from bzrlib.repofmt.knitrepo import KnitRepository
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
59
from bzrlib.repository import (
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
60
    CommitBuilder,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
61
    MetaDirRepositoryFormat,
3376.2.12 by Martin Pool
pyflakes corrections (thanks spiv)
62
    RepositoryFormat,
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
63
    RootCommitBuilder,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
64
    )
65
import bzrlib.revision as _mod_revision
3376.2.12 by Martin Pool
pyflakes corrections (thanks spiv)
66
from bzrlib.trace import (
67
    mutter,
68
    warning,
69
    )
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
70
71
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
72
class PackCommitBuilder(CommitBuilder):
73
    """A subclass of CommitBuilder to add texts with pack semantics.
74
    
75
    Specifically this uses one knit object rather than one knit object per
76
    added text, reducing memory and object pressure.
77
    """
78
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
79
    def __init__(self, repository, parents, config, timestamp=None,
80
                 timezone=None, committer=None, revprops=None,
81
                 revision_id=None):
82
        CommitBuilder.__init__(self, repository, parents, config,
83
            timestamp=timestamp, timezone=timezone, committer=committer,
84
            revprops=revprops, revision_id=revision_id)
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
85
        self._file_graph = graph.Graph(
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
86
            repository._pack_collection.text_index.combined_index)
87
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
88
    def _heads(self, file_id, revision_ids):
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
89
        keys = [(file_id, revision_id) for revision_id in revision_ids]
90
        return set([key[1] for key in self._file_graph.heads(keys)])
91
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
92
93
class PackRootCommitBuilder(RootCommitBuilder):
94
    """A subclass of RootCommitBuilder to add texts with pack semantics.
95
    
96
    Specifically this uses one knit object rather than one knit object per
97
    added text, reducing memory and object pressure.
98
    """
99
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
100
    def __init__(self, repository, parents, config, timestamp=None,
101
                 timezone=None, committer=None, revprops=None,
102
                 revision_id=None):
103
        CommitBuilder.__init__(self, repository, parents, config,
104
            timestamp=timestamp, timezone=timezone, committer=committer,
105
            revprops=revprops, revision_id=revision_id)
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
106
        self._file_graph = graph.Graph(
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
107
            repository._pack_collection.text_index.combined_index)
108
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
109
    def _heads(self, file_id, revision_ids):
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
110
        keys = [(file_id, revision_id) for revision_id in revision_ids]
111
        return set([key[1] for key in self._file_graph.heads(keys)])
112
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
113
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
114
class Pack(object):
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
115
    """An in memory proxy for a pack and its indices.
116
117
    This is a base class that is not directly used, instead the classes
118
    ExistingPack and NewPack are used.
119
    """
120
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
121
    def __init__(self, revision_index, inventory_index, text_index,
122
        signature_index):
2592.3.192 by Robert Collins
Move new revision index management to NewPack.
123
        """Create a pack instance.
124
125
        :param revision_index: A GraphIndex for determining what revisions are
126
            present in the Pack and accessing the locations of their texts.
2592.3.195 by Robert Collins
Move some inventory index logic to NewPack.
127
        :param inventory_index: A GraphIndex for determining what inventories are
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
128
            present in the Pack and accessing the locations of their
129
            texts/deltas.
130
        :param text_index: A GraphIndex for determining what file texts
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
131
            are present in the pack and accessing the locations of their
132
            texts/deltas (via (fileid, revisionid) tuples).
3495.3.1 by Martin Pool
doc correction from SuperMMX
133
        :param signature_index: A GraphIndex for determining what signatures are
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
134
            present in the Pack and accessing the locations of their texts.
2592.3.192 by Robert Collins
Move new revision index management to NewPack.
135
        """
136
        self.revision_index = revision_index
2592.3.195 by Robert Collins
Move some inventory index logic to NewPack.
137
        self.inventory_index = inventory_index
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
138
        self.text_index = text_index
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
139
        self.signature_index = signature_index
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
140
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
141
    def access_tuple(self):
142
        """Return a tuple (transport, name) for the pack content."""
143
        return self.pack_transport, self.file_name()
144
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
145
    def file_name(self):
146
        """Get the file name for the pack on disk."""
147
        return self.name + '.pack'
148
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
149
    def get_revision_count(self):
150
        return self.revision_index.key_count()
151
152
    def inventory_index_name(self, name):
153
        """The inv index is the name + .iix."""
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
154
        return self.index_name('inventory', name)
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
155
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
156
    def revision_index_name(self, name):
157
        """The revision index is the name + .rix."""
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
158
        return self.index_name('revision', name)
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
159
160
    def signature_index_name(self, name):
161
        """The signature index is the name + .six."""
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
162
        return self.index_name('signature', name)
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
163
164
    def text_index_name(self, name):
165
        """The text index is the name + .tix."""
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
166
        return self.index_name('text', name)
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
167
3035.2.5 by John Arbash Meinel
Rename function to remove _new_ (per Robert's suggestion)
168
    def _external_compression_parents_of_texts(self):
3035.2.4 by John Arbash Meinel
Fix bug #165290 by having the fetch code check that all external references are satisfied before it allows the data to be committed.
169
        keys = set()
170
        refs = set()
171
        for node in self.text_index.iter_all_entries():
172
            keys.add(node[1])
173
            refs.update(node[3][1])
174
        return refs - keys
175
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
176
177
class ExistingPack(Pack):
2592.3.222 by Robert Collins
More review feedback.
178
    """An in memory proxy for an existing .pack and its disk indices."""
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
179
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
180
    def __init__(self, pack_transport, name, revision_index, inventory_index,
2592.3.177 by Robert Collins
Make all parameters to Pack objects mandatory.
181
        text_index, signature_index):
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
182
        """Create an ExistingPack object.
183
184
        :param pack_transport: The transport where the pack file resides.
185
        :param name: The name of the pack on disk in the pack_transport.
186
        """
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
187
        Pack.__init__(self, revision_index, inventory_index, text_index,
188
            signature_index)
2592.3.173 by Robert Collins
Basic implementation of all_packs.
189
        self.name = name
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
190
        self.pack_transport = pack_transport
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
191
        if None in (revision_index, inventory_index, text_index,
192
                signature_index, name, pack_transport):
193
            raise AssertionError()
2592.3.173 by Robert Collins
Basic implementation of all_packs.
194
195
    def __eq__(self, other):
196
        return self.__dict__ == other.__dict__
197
198
    def __ne__(self, other):
199
        return not self.__eq__(other)
200
201
    def __repr__(self):
202
        return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
3221.12.4 by Robert Collins
Implement basic repository supporting external references.
203
            id(self), self.pack_transport, self.name)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
204
205
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
206
class NewPack(Pack):
207
    """An in memory proxy for a pack which is being created."""
208
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
209
    # A map of index 'type' to the file extension and position in the
210
    # index_sizes array.
2592.3.227 by Martin Pool
Rename NewPack.indices to NewPack.index_definitions
211
    index_definitions = {
2592.3.226 by Martin Pool
formatting and docstrings
212
        'revision': ('.rix', 0),
213
        'inventory': ('.iix', 1),
214
        'text': ('.tix', 2),
215
        'signature': ('.six', 3),
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
216
        }
217
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
218
    def __init__(self, upload_transport, index_transport, pack_transport,
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
219
        upload_suffix='', file_mode=None):
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
220
        """Create a NewPack instance.
221
222
        :param upload_transport: A writable transport for the pack to be
223
            incrementally uploaded to.
224
        :param index_transport: A writable transport for the pack's indices to
225
            be written to when the pack is finished.
226
        :param pack_transport: A writable transport for the pack to be renamed
2592.3.206 by Robert Collins
Move pack rename-into-place into NewPack.finish and document hash-collision cases somewhat better.
227
            to when the upload is complete. This *must* be the same as
228
            upload_transport.clone('../packs').
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
229
        :param upload_suffix: An optional suffix to be given to any temporary
230
            files created during the pack creation. e.g '.autopack'
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
231
        :param file_mode: An optional file mode to create the new files with.
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
232
        """
2592.3.228 by Martin Pool
docstrings and error messages from review
233
        # The relative locations of the packs are constrained, but all are
234
        # passed in because the caller has them, so as to avoid object churn.
2592.3.195 by Robert Collins
Move some inventory index logic to NewPack.
235
        Pack.__init__(self,
236
            # Revisions: parents list, no text compression.
237
            InMemoryGraphIndex(reference_lists=1),
238
            # Inventory: We want to map compression only, but currently the
239
            # knit code hasn't been updated enough to understand that, so we
240
            # have a regular 2-list index giving parents and compression
241
            # source.
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
242
            InMemoryGraphIndex(reference_lists=2),
243
            # Texts: compression and per file graph, for all fileids - so two
244
            # reference lists and two elements in the key tuple.
245
            InMemoryGraphIndex(reference_lists=2, key_elements=2),
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
246
            # Signatures: Just blobs to store, no compression, no parents
247
            # listing.
248
            InMemoryGraphIndex(reference_lists=0),
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
249
            )
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
250
        # where should the new pack be opened
251
        self.upload_transport = upload_transport
252
        # where are indices written out to
253
        self.index_transport = index_transport
254
        # where is the pack renamed to when it is finished?
255
        self.pack_transport = pack_transport
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
256
        # What file mode to upload the pack and indices with.
257
        self._file_mode = file_mode
2592.3.193 by Robert Collins
Move hash tracking of new packs into NewPack.
258
        # tracks the content written to the .pack file.
259
        self._hash = md5.new()
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
260
        # a four-tuple with the length in bytes of the indices, once the pack
2592.3.195 by Robert Collins
Move some inventory index logic to NewPack.
261
        # is finalised. (rev, inv, text, sigs)
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
262
        self.index_sizes = None
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
263
        # How much data to cache when writing packs. Note that this is not
2592.3.222 by Robert Collins
More review feedback.
264
        # synchronised with reads, because it's not in the transport layer, so
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
265
        # is not safe unless the client knows it won't be reading from the pack
266
        # under creation.
267
        self._cache_limit = 0
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
268
        # the temporary pack file name.
269
        self.random_name = rand_chars(20) + upload_suffix
270
        # when was this pack started ?
271
        self.start_time = time.time()
2592.3.202 by Robert Collins
Move write stream management into NewPack.
272
        # open an output stream for the data added to the pack.
273
        self.write_stream = self.upload_transport.open_write_stream(
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
274
            self.random_name, mode=self._file_mode)
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
275
        if 'pack' in debug.debug_flags:
2592.3.202 by Robert Collins
Move write stream management into NewPack.
276
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
277
                time.ctime(), self.upload_transport.base, self.random_name,
278
                time.time() - self.start_time)
2592.3.233 by Martin Pool
Review cleanups
279
        # A list of byte sequences to be written to the new pack, and the 
280
        # aggregate size of them.  Stored as a list rather than separate 
281
        # variables so that the _write_data closure below can update them.
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
282
        self._buffer = [[], 0]
2592.3.233 by Martin Pool
Review cleanups
283
        # create a callable for adding data 
284
        #
285
        # robertc says- this is a closure rather than a method on the object
286
        # so that the variables are locals, and faster than accessing object
287
        # members.
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
288
        def _write_data(bytes, flush=False, _buffer=self._buffer,
289
            _write=self.write_stream.write, _update=self._hash.update):
290
            _buffer[0].append(bytes)
291
            _buffer[1] += len(bytes)
2592.3.222 by Robert Collins
More review feedback.
292
            # buffer cap
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
293
            if _buffer[1] > self._cache_limit or flush:
294
                bytes = ''.join(_buffer[0])
295
                _write(bytes)
296
                _update(bytes)
297
                _buffer[:] = [[], 0]
2592.3.202 by Robert Collins
Move write stream management into NewPack.
298
        # expose this on self, for the occasion when clients want to add data.
299
        self._write_data = _write_data
2592.3.205 by Robert Collins
Move the pack ContainerWriter instance into NewPack.
300
        # a pack writer object to serialise pack records.
301
        self._writer = pack.ContainerWriter(self._write_data)
302
        self._writer.begin()
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
303
        # what state is the pack in? (open, finished, aborted)
304
        self._state = 'open'
2592.3.202 by Robert Collins
Move write stream management into NewPack.
305
306
    def abort(self):
307
        """Cancel creating this pack."""
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
308
        self._state = 'aborted'
2938.1.1 by Robert Collins
trivial fix for packs@win32: explicitly close file before deleting
309
        self.write_stream.close()
2592.3.202 by Robert Collins
Move write stream management into NewPack.
310
        # Remove the temporary pack file.
311
        self.upload_transport.delete(self.random_name)
312
        # The indices have no state on disk.
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
313
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
314
    def access_tuple(self):
315
        """Return a tuple (transport, name) for the pack content."""
316
        if self._state == 'finished':
317
            return Pack.access_tuple(self)
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
318
        elif self._state == 'open':
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
319
            return self.upload_transport, self.random_name
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
320
        else:
321
            raise AssertionError(self._state)
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
322
2592.3.198 by Robert Collins
Factor out data_inserted to reduce code duplication in detecting empty packs.
323
    def data_inserted(self):
324
        """True if data has been added to this pack."""
2592.3.233 by Martin Pool
Review cleanups
325
        return bool(self.get_revision_count() or
326
            self.inventory_index.key_count() or
327
            self.text_index.key_count() or
328
            self.signature_index.key_count())
2592.3.198 by Robert Collins
Factor out data_inserted to reduce code duplication in detecting empty packs.
329
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
330
    def finish(self):
331
        """Finish the new pack.
332
333
        This:
334
         - finalises the content
335
         - assigns a name (the md5 of the content, currently)
336
         - writes out the associated indices
337
         - renames the pack into place.
338
         - stores the index size tuple for the pack in the index_sizes
339
           attribute.
340
        """
2592.3.205 by Robert Collins
Move the pack ContainerWriter instance into NewPack.
341
        self._writer.end()
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
342
        if self._buffer[1]:
343
            self._write_data('', flush=True)
2592.3.199 by Robert Collins
Store the name of a NewPack in the object upon finish().
344
        self.name = self._hash.hexdigest()
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
345
        # write indices
2592.3.233 by Martin Pool
Review cleanups
346
        # XXX: It'd be better to write them all to temporary names, then
347
        # rename them all into place, so that the window when only some are
348
        # visible is smaller.  On the other hand none will be seen until
349
        # they're in the names list.
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
350
        self.index_sizes = [None, None, None, None]
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
351
        self._write_index('revision', self.revision_index, 'revision')
352
        self._write_index('inventory', self.inventory_index, 'inventory')
353
        self._write_index('text', self.text_index, 'file texts')
354
        self._write_index('signature', self.signature_index,
355
            'revision signatures')
2592.3.202 by Robert Collins
Move write stream management into NewPack.
356
        self.write_stream.close()
2592.3.206 by Robert Collins
Move pack rename-into-place into NewPack.finish and document hash-collision cases somewhat better.
357
        # Note that this will clobber an existing pack with the same name,
358
        # without checking for hash collisions. While this is undesirable this
359
        # is something that can be rectified in a subsequent release. One way
360
        # to rectify it may be to leave the pack at the original name, writing
361
        # its pack-names entry as something like 'HASH: index-sizes
362
        # temporary-name'. Allocate that and check for collisions, if it is
363
        # collision free then rename it into place. If clients know this scheme
364
        # they can handle missing-file errors by:
365
        #  - try for HASH.pack
366
        #  - try for temporary-name
367
        #  - refresh the pack-list to see if the pack is now absent
368
        self.upload_transport.rename(self.random_name,
369
                '../packs/' + self.name + '.pack')
2592.3.211 by Robert Collins
Pack inventory index management cleaned up.
370
        self._state = 'finished'
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
371
        if 'pack' in debug.debug_flags:
2592.3.219 by Robert Collins
Review feedback.
372
            # XXX: size might be interesting?
373
            mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
374
                time.ctime(), self.upload_transport.base, self.random_name,
375
                self.pack_transport, self.name,
376
                time.time() - self.start_time)
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
377
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
378
    def flush(self):
379
        """Flush any current data."""
380
        if self._buffer[1]:
381
            bytes = ''.join(self._buffer[0])
382
            self.write_stream.write(bytes)
383
            self._hash.update(bytes)
384
            self._buffer[:] = [[], 0]
385
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
386
    def index_name(self, index_type, name):
387
        """Get the disk name of an index type for pack name 'name'."""
2592.3.227 by Martin Pool
Rename NewPack.indices to NewPack.index_definitions
388
        return name + NewPack.index_definitions[index_type][0]
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
389
390
    def index_offset(self, index_type):
391
        """Get the position in a index_size array for a given index type."""
2592.3.227 by Martin Pool
Rename NewPack.indices to NewPack.index_definitions
392
        return NewPack.index_definitions[index_type][1]
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
393
2592.3.233 by Martin Pool
Review cleanups
394
    def _replace_index_with_readonly(self, index_type):
395
        setattr(self, index_type + '_index',
396
            GraphIndex(self.index_transport,
397
                self.index_name(index_type, self.name),
398
                self.index_sizes[self.index_offset(index_type)]))
399
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
400
    def set_write_cache_size(self, size):
401
        self._cache_limit = size
402
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
403
    def _write_index(self, index_type, index, label):
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
404
        """Write out an index.
405
2592.3.222 by Robert Collins
More review feedback.
406
        :param index_type: The type of index to write - e.g. 'revision'.
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
407
        :param index: The index object to serialise.
408
        :param label: What label to give the index e.g. 'revision'.
409
        """
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
410
        index_name = self.index_name(index_type, self.name)
411
        self.index_sizes[self.index_offset(index_type)] = \
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
412
            self.index_transport.put_file(index_name, index.finish(),
413
            mode=self._file_mode)
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
414
        if 'pack' in debug.debug_flags:
2592.3.196 by Robert Collins
Move some text index logic to NewPack.
415
            # XXX: size might be interesting?
416
            mutter('%s: create_pack: wrote %s index: %s%s t+%6.3fs',
417
                time.ctime(), label, self.upload_transport.base,
418
                self.random_name, time.time() - self.start_time)
2592.3.233 by Martin Pool
Review cleanups
419
        # Replace the writable index on this object with a readonly, 
420
        # presently unloaded index. We should alter
421
        # the index layer to make its finish() error if add_node is
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
422
        # subsequently used. RBC
2592.3.233 by Martin Pool
Review cleanups
423
        self._replace_index_with_readonly(index_type)
2592.3.195 by Robert Collins
Move some inventory index logic to NewPack.
424
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
425
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
426
class AggregateIndex(object):
427
    """An aggregated index for the RepositoryPackCollection.
428
429
    AggregateIndex is reponsible for managing the PackAccess object,
430
    Index-To-Pack mapping, and all indices list for a specific type of index
431
    such as 'revision index'.
2592.3.228 by Martin Pool
docstrings and error messages from review
432
433
    A CombinedIndex provides an index on a single key space built up
434
    from several on-disk indices.  The AggregateIndex builds on this 
435
    to provide a knit access layer, and allows having up to one writable
436
    index within the collection.
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
437
    """
2592.3.235 by Martin Pool
Review cleanups
438
    # XXX: Probably 'can be written to' could/should be separated from 'acts
439
    # like a knit index' -- mbp 20071024
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
440
441
    def __init__(self):
442
        """Create an AggregateIndex."""
443
        self.index_to_pack = {}
444
        self.combined_index = CombinedGraphIndex([])
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
445
        self.data_access = _DirectPackAccess(self.index_to_pack)
446
        self.add_callback = None
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
447
448
    def replace_indices(self, index_to_pack, indices):
449
        """Replace the current mappings with fresh ones.
450
451
        This should probably not be used eventually, rather incremental add and
452
        removal of indices. It has been added during refactoring of existing
453
        code.
454
455
        :param index_to_pack: A mapping from index objects to
456
            (transport, name) tuples for the pack file data.
457
        :param indices: A list of indices.
458
        """
459
        # refresh the revision pack map dict without replacing the instance.
460
        self.index_to_pack.clear()
461
        self.index_to_pack.update(index_to_pack)
462
        # XXX: API break - clearly a 'replace' method would be good?
463
        self.combined_index._indices[:] = indices
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
464
        # the current add nodes callback for the current writable index if
465
        # there is one.
466
        self.add_callback = None
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
467
468
    def add_index(self, index, pack):
469
        """Add index to the aggregate, which is an index for Pack pack.
2592.3.226 by Martin Pool
formatting and docstrings
470
471
        Future searches on the aggregate index will seach this new index
472
        before all previously inserted indices.
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
473
        
2592.3.226 by Martin Pool
formatting and docstrings
474
        :param index: An Index for the pack.
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
475
        :param pack: A Pack instance.
476
        """
477
        # expose it to the index map
478
        self.index_to_pack[index] = pack.access_tuple()
479
        # put it at the front of the linear index list
480
        self.combined_index.insert_index(0, index)
481
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
482
    def add_writable_index(self, index, pack):
483
        """Add an index which is able to have data added to it.
2592.3.235 by Martin Pool
Review cleanups
484
485
        There can be at most one writable index at any time.  Any
486
        modifications made to the knit are put into this index.
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
487
        
488
        :param index: An index from the pack parameter.
489
        :param pack: A Pack instance.
490
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
491
        if self.add_callback is not None:
492
            raise AssertionError(
493
                "%s already has a writable index through %s" % \
494
                (self, self.add_callback))
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
495
        # allow writing: queue writes to a new index
496
        self.add_index(index, pack)
497
        # Updates the index to packs mapping as a side effect,
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
498
        self.data_access.set_writer(pack._writer, index, pack.access_tuple())
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
499
        self.add_callback = index.add_nodes
500
501
    def clear(self):
502
        """Reset all the aggregate data to nothing."""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
503
        self.data_access.set_writer(None, None, (None, None))
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
504
        self.index_to_pack.clear()
505
        del self.combined_index._indices[:]
506
        self.add_callback = None
507
508
    def remove_index(self, index, pack):
509
        """Remove index from the indices used to answer queries.
510
        
511
        :param index: An index from the pack parameter.
512
        :param pack: A Pack instance.
513
        """
514
        del self.index_to_pack[index]
515
        self.combined_index._indices.remove(index)
516
        if (self.add_callback is not None and
517
            getattr(index, 'add_nodes', None) == self.add_callback):
518
            self.add_callback = None
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
519
            self.data_access.set_writer(None, None, (None, None))
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
520
2592.3.208 by Robert Collins
Start refactoring the knit-pack thunking to be clearer.
521
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
522
class Packer(object):
523
    """Create a pack from packs."""
524
525
    def __init__(self, pack_collection, packs, suffix, revision_ids=None):
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
526
        """Create a Packer.
527
528
        :param pack_collection: A RepositoryPackCollection object where the
529
            new pack is being written to.
530
        :param packs: The packs to combine.
531
        :param suffix: The suffix to use on the temporary files for the pack.
532
        :param revision_ids: Revision ids to limit the pack to.
533
        """
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
534
        self.packs = packs
535
        self.suffix = suffix
536
        self.revision_ids = revision_ids
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
537
        # The pack object we are creating.
538
        self.new_pack = None
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
539
        self._pack_collection = pack_collection
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
540
        # The index layer keys for the revisions being copied. None for 'all
541
        # objects'.
542
        self._revision_keys = None
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
543
        # What text keys to copy. None for 'all texts'. This is set by
544
        # _copy_inventory_texts
545
        self._text_filter = None
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
546
        self._extra_init()
547
548
    def _extra_init(self):
549
        """A template hook to allow extending the constructor trivially."""
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
550
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
551
    def pack(self, pb=None):
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
552
        """Create a new pack by reading data from other packs.
553
554
        This does little more than a bulk copy of data. One key difference
555
        is that data with the same item key across multiple packs is elided
556
        from the output. The new pack is written into the current pack store
557
        along with its indices, and the name added to the pack names. The 
2592.3.182 by Robert Collins
Eliminate the need to use a transport,name tuple to represent a pack during fetch.
558
        source packs are not altered and are not required to be in the current
559
        pack collection.
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
560
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
561
        :param pb: An optional progress bar to use. A nested bar is created if
562
            this is None.
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
563
        :return: A Pack object, or None if nothing was copied.
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
564
        """
565
        # open a pack - using the same name as the last temporary file
566
        # - which has already been flushed, so its safe.
567
        # XXX: - duplicate code warning with start_write_group; fix before
568
        #      considering 'done'.
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
569
        if self._pack_collection._new_pack is not None:
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
570
            raise errors.BzrError('call to create_pack_from_packs while '
571
                'another pack is being written.')
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
572
        if self.revision_ids is not None:
573
            if len(self.revision_ids) == 0:
2947.1.3 by Robert Collins
Unbreak autopack. Doh.
574
                # silly fetch request.
575
                return None
576
            else:
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
577
                self.revision_ids = frozenset(self.revision_ids)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
578
                self.revision_keys = frozenset((revid,) for revid in
579
                    self.revision_ids)
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
580
        if pb is None:
581
            self.pb = ui.ui_factory.nested_progress_bar()
582
        else:
583
            self.pb = pb
2592.6.11 by Robert Collins
* A progress bar has been added for knitpack -> knitpack fetching.
584
        try:
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
585
            return self._create_pack_from_packs()
2592.6.11 by Robert Collins
* A progress bar has been added for knitpack -> knitpack fetching.
586
        finally:
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
587
            if pb is None:
588
                self.pb.finished()
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
589
590
    def open_pack(self):
591
        """Open a pack for the pack we are creating."""
592
        return NewPack(self._pack_collection._upload_transport,
593
            self._pack_collection._index_transport,
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
594
            self._pack_collection._pack_transport, upload_suffix=self.suffix,
3416.2.2 by Martin Pool
Change some callers to get file and directory permissions from bzrdir not LockableFiles
595
            file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
596
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
597
    def _copy_revision_texts(self):
598
        """Copy revision data to the new pack."""
599
        # select revisions
600
        if self.revision_ids:
601
            revision_keys = [(revision_id,) for revision_id in self.revision_ids]
602
        else:
603
            revision_keys = None
604
        # select revision keys
605
        revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
606
            self.packs, 'revision_index')[0]
607
        revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)
608
        # copy revision keys and adjust values
609
        self.pb.update("Copying revision texts", 1)
3070.1.2 by John Arbash Meinel
Cleanup OptimizingPacker code according to my review feedback
610
        total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
611
        list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
612
            self.new_pack.revision_index, readv_group_iter, total_items))
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
613
        if 'pack' in debug.debug_flags:
614
            mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
615
                time.ctime(), self._pack_collection._upload_transport.base,
616
                self.new_pack.random_name,
617
                self.new_pack.revision_index.key_count(),
618
                time.time() - self.new_pack.start_time)
619
        self._revision_keys = revision_keys
620
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
621
    def _copy_inventory_texts(self):
622
        """Copy the inventory texts to the new pack.
623
624
        self._revision_keys is used to determine what inventories to copy.
625
626
        Sets self._text_filter appropriately.
627
        """
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
628
        # select inventory keys
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
629
        inv_keys = self._revision_keys # currently the same keyspace, and note that
2592.3.145 by Robert Collins
Fix test_fetch_missing_text_other_location_fails for pack repositories.
630
        # querying for keys here could introduce a bug where an inventory item
631
        # is missed, so do not change it to query separately without cross
632
        # checking like the text key check below.
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
633
        inventory_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
634
            self.packs, 'inventory_index')[0]
635
        inv_nodes = self._pack_collection._index_contents(inventory_index_map, inv_keys)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
636
        # copy inventory keys and adjust values
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
637
        # XXX: Should be a helper function to allow different inv representation
638
        # at this point.
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
639
        self.pb.update("Copying inventory texts", 2)
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
640
        total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
3253.1.1 by John Arbash Meinel
Reduce memory consumption during autopack.
641
        # Only grab the output lines if we will be processing them
642
        output_lines = bool(self.revision_ids)
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
643
        inv_lines = self._copy_nodes_graph(inventory_index_map,
644
            self.new_pack._writer, self.new_pack.inventory_index,
3253.1.1 by John Arbash Meinel
Reduce memory consumption during autopack.
645
            readv_group_iter, total_items, output_lines=output_lines)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
646
        if self.revision_ids:
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
647
            self._process_inventory_lines(inv_lines)
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
648
        else:
2592.3.145 by Robert Collins
Fix test_fetch_missing_text_other_location_fails for pack repositories.
649
            # eat the iterator to cause it to execute.
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
650
            list(inv_lines)
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
651
            self._text_filter = None
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
652
        if 'pack' in debug.debug_flags:
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
653
            mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
654
                time.ctime(), self._pack_collection._upload_transport.base,
655
                self.new_pack.random_name,
656
                self.new_pack.inventory_index.key_count(),
3231.3.1 by James Westby
Make -Dpack not cause a error trying to use an unkown variable.
657
                time.time() - self.new_pack.start_time)
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
658
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
659
    def _copy_text_texts(self):
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
660
        # select text keys
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
661
        text_index_map, text_nodes = self._get_text_nodes()
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
662
        if self._text_filter is not None:
2592.3.149 by Robert Collins
Unbreak pack to pack fetching properly, with missing-text detection really working.
663
            # We could return the keys copied as part of the return value from
664
            # _copy_nodes_graph but this doesn't work all that well with the
665
            # need to get line output too, so we check separately, and as we're
666
            # going to buffer everything anyway, we check beforehand, which
667
            # saves reading knit data over the wire when we know there are
668
            # mising records.
669
            text_nodes = set(text_nodes)
670
            present_text_keys = set(_node[1] for _node in text_nodes)
2951.2.2 by Robert Collins
Factor out inventory text copying in Packer to a single helper method.
671
            missing_text_keys = set(self._text_filter) - present_text_keys
2592.3.149 by Robert Collins
Unbreak pack to pack fetching properly, with missing-text detection really working.
672
            if missing_text_keys:
673
                # TODO: raise a specific error that can handle many missing
674
                # keys.
675
                a_missing_key = missing_text_keys.pop()
676
                raise errors.RevisionNotPresent(a_missing_key[1],
677
                    a_missing_key[0])
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
678
        # copy text keys and adjust values
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
679
        self.pb.update("Copying content texts", 3)
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
680
        total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
681
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
682
            self.new_pack.text_index, readv_group_iter, total_items))
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
683
        self._log_copied_texts()
684
3035.2.6 by John Arbash Meinel
Suggested by Robert: Move the missing externals check into part of Packer.pack()
685
    def _check_references(self):
686
        """Make sure our external refereneces are present."""
687
        external_refs = self.new_pack._external_compression_parents_of_texts()
688
        if external_refs:
689
            index = self._pack_collection.text_index.combined_index
690
            found_items = list(index.iter_entries(external_refs))
691
            if len(found_items) != len(external_refs):
692
                found_keys = set(k for idx, k, refs, value in found_items)
693
                missing_items = external_refs - found_keys
694
                missing_file_id, missing_revision_id = missing_items.pop()
695
                raise errors.RevisionNotPresent(missing_revision_id,
696
                                                missing_file_id)
697
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
698
    def _create_pack_from_packs(self):
699
        self.pb.update("Opening pack", 0, 5)
700
        self.new_pack = self.open_pack()
701
        new_pack = self.new_pack
702
        # buffer data - we won't be reading-back during the pack creation and
703
        # this makes a significant difference on sftp pushes.
704
        new_pack.set_write_cache_size(1024*1024)
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
705
        if 'pack' in debug.debug_flags:
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
706
            plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
707
                for a_pack in self.packs]
708
            if self.revision_ids is not None:
709
                rev_count = len(self.revision_ids)
710
            else:
711
                rev_count = 'all'
712
            mutter('%s: create_pack: creating pack from source packs: '
713
                '%s%s %s revisions wanted %s t=0',
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
714
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
715
                plain_pack_list, rev_count)
716
        self._copy_revision_texts()
717
        self._copy_inventory_texts()
718
        self._copy_text_texts()
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
719
        # select signature keys
2951.2.1 by Robert Collins
Factor out revision text copying in Packer to a single helper method.
720
        signature_filter = self._revision_keys # same keyspace
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
721
        signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
722
            self.packs, 'signature_index')[0]
723
        signature_nodes = self._pack_collection._index_contents(signature_index_map,
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
724
            signature_filter)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
725
        # copy signature keys and adjust values
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
726
        self.pb.update("Copying signature texts", 4)
2592.3.205 by Robert Collins
Move the pack ContainerWriter instance into NewPack.
727
        self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
728
            new_pack.signature_index)
2592.3.234 by Martin Pool
Use -Dpack not -Dfetch for pack traces
729
        if 'pack' in debug.debug_flags:
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
730
            mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
731
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
2592.3.197 by Robert Collins
Hand over signature index creation to NewPack.
732
                new_pack.signature_index.key_count(),
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
733
                time.time() - new_pack.start_time)
3035.2.6 by John Arbash Meinel
Suggested by Robert: Move the missing externals check into part of Packer.pack()
734
        self._check_references()
2951.2.8 by Robert Collins
Test that reconciling a repository can be done twice in a row.
735
        if not self._use_pack(new_pack):
2592.3.203 by Robert Collins
Teach NewPack how to buffer for pack operations.
736
            new_pack.abort()
737
            return None
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
738
        self.pb.update("Finishing pack", 5)
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
739
        new_pack.finish()
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
740
        self._pack_collection.allocate(new_pack)
2592.3.206 by Robert Collins
Move pack rename-into-place into NewPack.finish and document hash-collision cases somewhat better.
741
        return new_pack
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
742
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
743
    def _copy_nodes(self, nodes, index_map, writer, write_index):
744
        """Copy knit nodes between packs with no graph references."""
745
        pb = ui.ui_factory.nested_progress_bar()
746
        try:
747
            return self._do_copy_nodes(nodes, index_map, writer,
748
                write_index, pb)
749
        finally:
750
            pb.finished()
751
752
    def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
753
        # for record verification
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
754
        knit = KnitVersionedFiles(None, None)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
755
        # plan a readv on each source pack:
756
        # group by pack
757
        nodes = sorted(nodes)
758
        # how to map this into knit.py - or knit.py into this?
759
        # we don't want the typical knit logic, we want grouping by pack
760
        # at this point - perhaps a helper library for the following code 
761
        # duplication points?
762
        request_groups = {}
763
        for index, key, value in nodes:
764
            if index not in request_groups:
765
                request_groups[index] = []
766
            request_groups[index].append((key, value))
767
        record_index = 0
768
        pb.update("Copied record", record_index, len(nodes))
769
        for index, items in request_groups.iteritems():
770
            pack_readv_requests = []
771
            for key, value in items:
772
                # ---- KnitGraphIndex.get_position
773
                bits = value[1:].split(' ')
774
                offset, length = int(bits[0]), int(bits[1])
775
                pack_readv_requests.append((offset, length, (key, value[0])))
776
            # linear scan up the pack
777
            pack_readv_requests.sort()
778
            # copy the data
779
            transport, path = index_map[index]
780
            reader = pack.make_readv_reader(transport, path,
781
                [offset[0:2] for offset in pack_readv_requests])
782
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
783
                izip(reader.iter_records(), pack_readv_requests):
784
                raw_data = read_func(None)
785
                # check the header only
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
786
                df, _ = knit._parse_record_header(key, raw_data)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
787
                df.close()
788
                pos, size = writer.add_bytes_record(raw_data, names)
789
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
790
                pb.update("Copied record", record_index)
791
                record_index += 1
792
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
793
    def _copy_nodes_graph(self, index_map, writer, write_index,
794
        readv_group_iter, total_items, output_lines=False):
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
795
        """Copy knit nodes between packs.
796
797
        :param output_lines: Return lines present in the copied data as
2975.3.1 by Robert Collins
Change (without backwards compatibility) the
798
            an iterator of line,version_id.
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
799
        """
800
        pb = ui.ui_factory.nested_progress_bar()
801
        try:
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
802
            for result in self._do_copy_nodes_graph(index_map, writer,
803
                write_index, output_lines, pb, readv_group_iter, total_items):
3039.1.1 by Robert Collins
(robertc) Fix the text progress for pack to pack fetches. (Robert Collins).
804
                yield result
3039.1.2 by Robert Collins
python2.4 'compatibility'.
805
        except Exception:
3039.1.3 by Robert Collins
Document the try:except:else: rather than a finally: in pack_repo.._copy_nodes_graph.
806
            # Python 2.4 does not permit try:finally: in a generator.
3039.1.2 by Robert Collins
python2.4 'compatibility'.
807
            pb.finished()
808
            raise
809
        else:
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
810
            pb.finished()
811
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
812
    def _do_copy_nodes_graph(self, index_map, writer, write_index,
813
        output_lines, pb, readv_group_iter, total_items):
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
814
        # for record verification
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
815
        knit = KnitVersionedFiles(None, None)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
816
        # for line extraction when requested (inventories only)
817
        if output_lines:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
818
            factory = KnitPlainFactory()
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
819
        record_index = 0
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
820
        pb.update("Copied record", record_index, total_items)
821
        for index, readv_vector, node_vector in readv_group_iter:
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
822
            # copy the data
823
            transport, path = index_map[index]
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
824
            reader = pack.make_readv_reader(transport, path, readv_vector)
825
            for (names, read_func), (key, eol_flag, references) in \
826
                izip(reader.iter_records(), node_vector):
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
827
                raw_data = read_func(None)
828
                if output_lines:
829
                    # read the entire thing
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
830
                    content, _ = knit._parse_record(key[-1], raw_data)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
831
                    if len(references[-1]) == 0:
832
                        line_iterator = factory.get_fulltext_content(content)
833
                    else:
834
                        line_iterator = factory.get_linedelta_content(content)
835
                    for line in line_iterator:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
836
                        yield line, key
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
837
                else:
838
                    # check the header only
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
839
                    df, _ = knit._parse_record_header(key, raw_data)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
840
                    df.close()
841
                pos, size = writer.add_bytes_record(raw_data, names)
842
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
843
                pb.update("Copied record", record_index)
844
                record_index += 1
845
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
846
    def _get_text_nodes(self):
847
        text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
848
            self.packs, 'text_index')[0]
849
        return text_index_map, self._pack_collection._index_contents(text_index_map,
850
            self._text_filter)
851
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
852
    def _least_readv_node_readv(self, nodes):
853
        """Generate request groups for nodes using the least readv's.
854
        
855
        :param nodes: An iterable of graph index nodes.
856
        :return: Total node count and an iterator of the data needed to perform
857
            readvs to obtain the data for nodes. Each item yielded by the
858
            iterator is a tuple with:
859
            index, readv_vector, node_vector. readv_vector is a list ready to
860
            hand to the transport readv method, and node_vector is a list of
861
            (key, eol_flag, references) for the the node retrieved by the
862
            matching readv_vector.
863
        """
864
        # group by pack so we do one readv per pack
865
        nodes = sorted(nodes)
866
        total = len(nodes)
867
        request_groups = {}
868
        for index, key, value, references in nodes:
869
            if index not in request_groups:
870
                request_groups[index] = []
871
            request_groups[index].append((key, value, references))
872
        result = []
873
        for index, items in request_groups.iteritems():
874
            pack_readv_requests = []
875
            for key, value, references in items:
876
                # ---- KnitGraphIndex.get_position
877
                bits = value[1:].split(' ')
878
                offset, length = int(bits[0]), int(bits[1])
879
                pack_readv_requests.append(
880
                    ((offset, length), (key, value[0], references)))
881
            # linear scan up the pack to maximum range combining.
882
            pack_readv_requests.sort()
883
            # split out the readv and the node data.
884
            pack_readv = [readv for readv, node in pack_readv_requests]
885
            node_vector = [node for readv, node in pack_readv_requests]
886
            result.append((index, pack_readv, node_vector))
887
        return total, result
888
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
889
    def _log_copied_texts(self):
890
        if 'pack' in debug.debug_flags:
891
            mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
892
                time.ctime(), self._pack_collection._upload_transport.base,
893
                self.new_pack.random_name,
894
                self.new_pack.text_index.key_count(),
895
                time.time() - self.new_pack.start_time)
896
897
    def _process_inventory_lines(self, inv_lines):
898
        """Use up the inv_lines generator and setup a text key filter."""
899
        repo = self._pack_collection.repo
900
        fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
901
            inv_lines, self.revision_keys)
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
902
        text_filter = []
903
        for fileid, file_revids in fileid_revisions.iteritems():
904
            text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
905
        self._text_filter = text_filter
906
3070.1.2 by John Arbash Meinel
Cleanup OptimizingPacker code according to my review feedback
907
    def _revision_node_readv(self, revision_nodes):
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
908
        """Return the total revisions and the readv's to issue.
909
910
        :param revision_nodes: The revision index contents for the packs being
911
            incorporated into the new pack.
912
        :return: As per _least_readv_node_readv.
913
        """
914
        return self._least_readv_node_readv(revision_nodes)
915
2951.2.8 by Robert Collins
Test that reconciling a repository can be done twice in a row.
916
    def _use_pack(self, new_pack):
917
        """Return True if new_pack should be used.
918
919
        :param new_pack: The pack that has just been created.
920
        :return: True if the pack should be used.
921
        """
922
        return new_pack.data_inserted()
923
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
924
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
925
class OptimisingPacker(Packer):
926
    """A packer which spends more time to create better disk layouts."""
927
3070.1.2 by John Arbash Meinel
Cleanup OptimizingPacker code according to my review feedback
928
    def _revision_node_readv(self, revision_nodes):
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
929
        """Return the total revisions and the readv's to issue.
930
931
        This sort places revisions in topological order with the ancestors
932
        after the children.
933
934
        :param revision_nodes: The revision index contents for the packs being
935
            incorporated into the new pack.
936
        :return: As per _least_readv_node_readv.
937
        """
938
        # build an ancestors dict
939
        ancestors = {}
940
        by_key = {}
941
        for index, key, value, references in revision_nodes:
942
            ancestors[key] = references[0]
943
            by_key[key] = (index, value, references)
944
        order = tsort.topo_sort(ancestors)
945
        total = len(order)
946
        # Single IO is pathological, but it will work as a starting point.
947
        requests = []
948
        for key in reversed(order):
949
            index, value, references = by_key[key]
950
            # ---- KnitGraphIndex.get_position
951
            bits = value[1:].split(' ')
952
            offset, length = int(bits[0]), int(bits[1])
953
            requests.append(
954
                (index, [(offset, length)], [(key, value[0], references)]))
955
        # TODO: combine requests in the same index that are in ascending order.
956
        return total, requests
957
958
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
959
class ReconcilePacker(Packer):
960
    """A packer which regenerates indices etc as it copies.
961
    
962
    This is used by ``bzr reconcile`` to cause parent text pointers to be
963
    regenerated.
964
    """
965
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
966
    def _extra_init(self):
967
        self._data_changed = False
968
969
    def _process_inventory_lines(self, inv_lines):
970
        """Generate a text key reference map rather for reconciling with."""
971
        repo = self._pack_collection.repo
972
        refs = repo._find_text_key_references_from_xml_inventory_lines(
973
            inv_lines)
974
        self._text_refs = refs
975
        # during reconcile we:
976
        #  - convert unreferenced texts to full texts
977
        #  - correct texts which reference a text not copied to be full texts
978
        #  - copy all others as-is but with corrected parents.
979
        #  - so at this point we don't know enough to decide what becomes a full
980
        #    text.
981
        self._text_filter = None
982
983
    def _copy_text_texts(self):
984
        """generate what texts we should have and then copy."""
985
        self.pb.update("Copying content texts", 3)
986
        # we have three major tasks here:
987
        # 1) generate the ideal index
988
        repo = self._pack_collection.repo
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
989
        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
3063.2.2 by Robert Collins
Review feedback.
990
            _1, key, _2, refs in 
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
991
            self.new_pack.revision_index.iter_all_entries()])
992
        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
993
        # 2) generate a text_nodes list that contains all the deltas that can
994
        #    be used as-is, with corrected parents.
995
        ok_nodes = []
996
        bad_texts = []
997
        discarded_nodes = []
998
        NULL_REVISION = _mod_revision.NULL_REVISION
999
        text_index_map, text_nodes = self._get_text_nodes()
1000
        for node in text_nodes:
1001
            # 0 - index
1002
            # 1 - key 
1003
            # 2 - value
1004
            # 3 - refs
1005
            try:
1006
                ideal_parents = tuple(ideal_index[node[1]])
1007
            except KeyError:
1008
                discarded_nodes.append(node)
1009
                self._data_changed = True
1010
            else:
1011
                if ideal_parents == (NULL_REVISION,):
1012
                    ideal_parents = ()
1013
                if ideal_parents == node[3][0]:
1014
                    # no change needed.
1015
                    ok_nodes.append(node)
1016
                elif ideal_parents[0:1] == node[3][0][0:1]:
1017
                    # the left most parent is the same, or there are no parents
1018
                    # today. Either way, we can preserve the representation as
1019
                    # long as we change the refs to be inserted.
1020
                    self._data_changed = True
1021
                    ok_nodes.append((node[0], node[1], node[2],
1022
                        (ideal_parents, node[3][1])))
1023
                    self._data_changed = True
1024
                else:
1025
                    # Reinsert this text completely
1026
                    bad_texts.append((node[1], ideal_parents))
1027
                    self._data_changed = True
1028
        # we're finished with some data.
1029
        del ideal_index
1030
        del text_nodes
3063.2.2 by Robert Collins
Review feedback.
1031
        # 3) bulk copy the ok data
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1032
        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1033
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1034
            self.new_pack.text_index, readv_group_iter, total_items))
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
1035
        # 4) adhoc copy all the other texts.
1036
        # We have to topologically insert all texts otherwise we can fail to
1037
        # reconcile when parts of a single delta chain are preserved intact,
1038
        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1039
        # reinserted, and if d3 has incorrect parents it will also be
1040
        # reinserted. If we insert d3 first, d2 is present (as it was bulk
1041
        # copied), so we will try to delta, but d2 is not currently able to be
1042
        # extracted because it's basis d1 is not present. Topologically sorting
1043
        # addresses this. The following generates a sort for all the texts that
1044
        # are being inserted without having to reference the entire text key
1045
        # space (we only topo sort the revisions, which is smaller).
1046
        topo_order = tsort.topo_sort(ancestors)
1047
        rev_order = dict(zip(topo_order, range(len(topo_order))))
1048
        bad_texts.sort(key=lambda key:rev_order[key[0][1]])
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1049
        transaction = repo.get_transaction()
1050
        file_id_index = GraphIndexPrefixAdapter(
1051
            self.new_pack.text_index,
1052
            ('blank', ), 1,
1053
            add_nodes_callback=self.new_pack.text_index.add_nodes)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
1054
        data_access = _DirectPackAccess(
1055
                {self.new_pack.text_index:self.new_pack.access_tuple()})
1056
        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1057
            self.new_pack.access_tuple())
1058
        output_texts = KnitVersionedFiles(
1059
            _KnitGraphIndex(self.new_pack.text_index,
1060
                add_callback=self.new_pack.text_index.add_nodes,
1061
                deltas=True, parents=True, is_locked=repo.is_locked),
1062
            data_access=data_access, max_delta_chain=200)
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1063
        for key, parent_keys in bad_texts:
1064
            # We refer to the new pack to delta data being output.
1065
            # A possible improvement would be to catch errors on short reads
1066
            # and only flush then.
1067
            self.new_pack.flush()
1068
            parents = []
1069
            for parent_key in parent_keys:
1070
                if parent_key[0] != key[0]:
1071
                    # Graph parents must match the fileid
1072
                    raise errors.BzrError('Mismatched key parent %r:%r' %
1073
                        (key, parent_keys))
1074
                parents.append(parent_key[1])
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
1075
            text_lines = split_lines(repo.texts.get_record_stream(
1076
                [key], 'unordered', True).next().get_bytes_as('fulltext'))
1077
            output_texts.add_lines(key, parent_keys, text_lines,
1078
                random_id=True, check_content=False)
3063.2.2 by Robert Collins
Review feedback.
1079
        # 5) check that nothing inserted has a reference outside the keyspace.
3035.2.5 by John Arbash Meinel
Rename function to remove _new_ (per Robert's suggestion)
1080
        missing_text_keys = self.new_pack._external_compression_parents_of_texts()
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1081
        if missing_text_keys:
1082
            raise errors.BzrError('Reference to missing compression parents %r'
3376.2.12 by Martin Pool
pyflakes corrections (thanks spiv)
1083
                % (missing_text_keys,))
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1084
        self._log_copied_texts()
1085
2951.2.8 by Robert Collins
Test that reconciling a repository can be done twice in a row.
1086
    def _use_pack(self, new_pack):
1087
        """Override _use_pack to check for reconcile having changed content."""
1088
        # XXX: we might be better checking this at the copy time.
1089
        original_inventory_keys = set()
1090
        inv_index = self._pack_collection.inventory_index.combined_index
1091
        for entry in inv_index.iter_all_entries():
1092
            original_inventory_keys.add(entry[1])
1093
        new_inventory_keys = set()
1094
        for entry in new_pack.inventory_index.iter_all_entries():
1095
            new_inventory_keys.add(entry[1])
1096
        if new_inventory_keys != original_inventory_keys:
1097
            self._data_changed = True
1098
        return new_pack.data_inserted() and self._data_changed
1099
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1100
1101
class RepositoryPackCollection(object):
3517.4.4 by Martin Pool
Document RepositoryPackCollection._names
1102
    """Management of packs within a repository.
1103
    
1104
    :ivar _names: map of {pack_name: (index_size,)}
1105
    """
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1106
1107
    def __init__(self, repo, transport, index_transport, upload_transport,
1108
                 pack_transport):
1109
        """Create a new RepositoryPackCollection.
1110
1111
        :param transport: Addresses the repository base directory 
1112
            (typically .bzr/repository/).
1113
        :param index_transport: Addresses the directory containing indices.
1114
        :param upload_transport: Addresses the directory into which packs are written
1115
            while they're being created.
1116
        :param pack_transport: Addresses the directory of existing complete packs.
1117
        """
1118
        self.repo = repo
1119
        self.transport = transport
1120
        self._index_transport = index_transport
1121
        self._upload_transport = upload_transport
1122
        self._pack_transport = pack_transport
1123
        self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1124
        self.packs = []
1125
        # name:Pack mapping
1126
        self._packs_by_name = {}
1127
        # the previous pack-names content
1128
        self._packs_at_load = None
1129
        # when a pack is being created by this object, the state of that pack.
1130
        self._new_pack = None
1131
        # aggregated revision index data
1132
        self.revision_index = AggregateIndex()
1133
        self.inventory_index = AggregateIndex()
1134
        self.text_index = AggregateIndex()
1135
        self.signature_index = AggregateIndex()
1136
1137
    def add_pack_to_memory(self, pack):
1138
        """Make a Pack object available to the repository to satisfy queries.
1139
        
1140
        :param pack: A Pack object.
1141
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
1142
        if pack.name in self._packs_by_name:
1143
            raise AssertionError()
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1144
        self.packs.append(pack)
1145
        self._packs_by_name[pack.name] = pack
1146
        self.revision_index.add_index(pack.revision_index, pack)
1147
        self.inventory_index.add_index(pack.inventory_index, pack)
1148
        self.text_index.add_index(pack.text_index, pack)
1149
        self.signature_index.add_index(pack.signature_index, pack)
1150
        
1151
    def all_packs(self):
1152
        """Return a list of all the Pack objects this repository has.
1153
1154
        Note that an in-progress pack being created is not returned.
1155
1156
        :return: A list of Pack objects for all the packs in the repository.
1157
        """
1158
        result = []
1159
        for name in self.names():
1160
            result.append(self.get_pack_by_name(name))
1161
        return result
1162
1163
    def autopack(self):
1164
        """Pack the pack collection incrementally.
1165
        
1166
        This will not attempt global reorganisation or recompression,
1167
        rather it will just ensure that the total number of packs does
1168
        not grow without bound. It uses the _max_pack_count method to
1169
        determine if autopacking is needed, and the pack_distribution
1170
        method to determine the number of revisions in each pack.
1171
1172
        If autopacking takes place then the packs name collection will have
1173
        been flushed to disk - packing requires updating the name collection
1174
        in synchronisation with certain steps. Otherwise the names collection
1175
        is not flushed.
1176
1177
        :return: True if packing took place.
1178
        """
1179
        # XXX: Should not be needed when the management of indices is sane.
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1180
        total_revisions = self.revision_index.combined_index.key_count()
1181
        total_packs = len(self._names)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1182
        if self._max_pack_count(total_revisions) >= total_packs:
1183
            return False
1184
        # XXX: the following may want to be a class, to pack with a given
1185
        # policy.
1186
        mutter('Auto-packing repository %s, which has %d pack files, '
1187
            'containing %d revisions into %d packs.', self, total_packs,
1188
            total_revisions, self._max_pack_count(total_revisions))
1189
        # determine which packs need changing
1190
        pack_distribution = self.pack_distribution(total_revisions)
1191
        existing_packs = []
1192
        for pack in self.all_packs():
1193
            revision_count = pack.get_revision_count()
1194
            if revision_count == 0:
1195
                # revision less packs are not generated by normal operation,
1196
                # only by operations like sign-my-commits, and thus will not
1197
                # tend to grow rapdily or without bound like commit containing
1198
                # packs do - leave them alone as packing them really should
1199
                # group their data with the relevant commit, and that may
1200
                # involve rewriting ancient history - which autopack tries to
1201
                # avoid. Alternatively we could not group the data but treat
1202
                # each of these as having a single revision, and thus add 
1203
                # one revision for each to the total revision count, to get
1204
                # a matching distribution.
1205
                continue
1206
            existing_packs.append((revision_count, pack))
1207
        pack_operations = self.plan_autopack_combinations(
1208
            existing_packs, pack_distribution)
1209
        self._execute_pack_operations(pack_operations)
1210
        return True
1211
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1212
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1213
        """Execute a series of pack operations.
1214
1215
        :param pack_operations: A list of [revision_count, packs_to_combine].
3070.1.2 by John Arbash Meinel
Cleanup OptimizingPacker code according to my review feedback
1216
        :param _packer_class: The class of packer to use (default: Packer).
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1217
        :return: None.
1218
        """
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1219
        for revision_count, packs in pack_operations:
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1220
            # we may have no-ops from the setup logic
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1221
            if len(packs) == 0:
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1222
                continue
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1223
            _packer_class(self, packs, '.autopack').pack()
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1224
            for pack in packs:
2592.3.236 by Martin Pool
Make RepositoryPackCollection.remove_pack_from_memory private
1225
                self._remove_pack_from_memory(pack)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1226
        # record the newly available packs and stop advertising the old
1227
        # packs
2948.1.1 by Robert Collins
* Obsolete packs are now cleaned up by pack and autopack operations.
1228
        self._save_pack_names(clear_obsolete_packs=True)
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1229
        # Move the old packs out of the way now they are no longer referenced.
1230
        for revision_count, packs in pack_operations:
1231
            self._obsolete_packs(packs)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1232
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1233
    def lock_names(self):
1234
        """Acquire the mutex around the pack-names index.
1235
        
1236
        This cannot be used in the middle of a read-only transaction on the
1237
        repository.
1238
        """
1239
        self.repo.control_files.lock_write()
1240
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1241
    def pack(self):
1242
        """Pack the pack collection totally."""
1243
        self.ensure_loaded()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1244
        total_packs = len(self._names)
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1245
        if total_packs < 2:
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1246
            # This is arguably wrong because we might not be optimal, but for
1247
            # now lets leave it in. (e.g. reconcile -> one pack. But not
1248
            # optimal.
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1249
            return
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1250
        total_revisions = self.revision_index.combined_index.key_count()
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1251
        # XXX: the following may want to be a class, to pack with a given
1252
        # policy.
1253
        mutter('Packing repository %s, which has %d pack files, '
1254
            'containing %d revisions into 1 packs.', self, total_packs,
1255
            total_revisions)
1256
        # determine which packs need changing
1257
        pack_distribution = [1]
1258
        pack_operations = [[0, []]]
1259
        for pack in self.all_packs():
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1260
            pack_operations[-1][0] += pack.get_revision_count()
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1261
            pack_operations[-1][1].append(pack)
3070.1.1 by Robert Collins
* ``bzr pack`` now orders revision texts in topological order, with newest
1262
        self._execute_pack_operations(pack_operations, OptimisingPacker)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1263
1264
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
2592.3.176 by Robert Collins
Various pack refactorings.
1265
        """Plan a pack operation.
1266
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1267
        :param existing_packs: The packs to pack. (A list of (revcount, Pack)
1268
            tuples).
2592.3.235 by Martin Pool
Review cleanups
1269
        :param pack_distribution: A list with the number of revisions desired
2592.3.176 by Robert Collins
Various pack refactorings.
1270
            in each pack.
1271
        """
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1272
        if len(existing_packs) <= len(pack_distribution):
1273
            return []
1274
        existing_packs.sort(reverse=True)
1275
        pack_operations = [[0, []]]
1276
        # plan out what packs to keep, and what to reorganise
1277
        while len(existing_packs):
1278
            # take the largest pack, and if its less than the head of the
1279
            # distribution chart we will include its contents in the new pack for
1280
            # that position. If its larger, we remove its size from the
1281
            # distribution chart
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1282
            next_pack_rev_count, next_pack = existing_packs.pop(0)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1283
            if next_pack_rev_count >= pack_distribution[0]:
1284
                # this is already packed 'better' than this, so we can
1285
                # not waste time packing it.
1286
                while next_pack_rev_count > 0:
1287
                    next_pack_rev_count -= pack_distribution[0]
1288
                    if next_pack_rev_count >= 0:
1289
                        # more to go
1290
                        del pack_distribution[0]
1291
                    else:
1292
                        # didn't use that entire bucket up
1293
                        pack_distribution[0] = -next_pack_rev_count
1294
            else:
1295
                # add the revisions we're going to add to the next output pack
1296
                pack_operations[-1][0] += next_pack_rev_count
1297
                # allocate this pack to the next pack sub operation
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1298
                pack_operations[-1][1].append(next_pack)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1299
                if pack_operations[-1][0] >= pack_distribution[0]:
1300
                    # this pack is used up, shift left.
1301
                    del pack_distribution[0]
1302
                    pack_operations.append([0, []])
1303
        
1304
        return pack_operations
1305
1306
    def ensure_loaded(self):
2592.3.214 by Robert Collins
Merge bzr.dev.
1307
        # NB: if you see an assertion error here, its probably access against
1308
        # an unlocked repo. Naughty.
3052.1.6 by John Arbash Meinel
Change the lock check to raise ObjectNotLocked.
1309
        if not self.repo.is_locked():
1310
            raise errors.ObjectNotLocked(self.repo)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1311
        if self._names is None:
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1312
            self._names = {}
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1313
            self._packs_at_load = set()
1314
            for index, key, value in self._iter_disk_pack_index():
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1315
                name = key[0]
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1316
                self._names[name] = self._parse_index_sizes(value)
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1317
                self._packs_at_load.add((key, value))
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1318
        # populate all the metadata.
1319
        self.all_packs()
1320
1321
    def _parse_index_sizes(self, value):
1322
        """Parse a string of index sizes."""
1323
        return tuple([int(digits) for digits in value.split(' ')])
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1324
2592.3.176 by Robert Collins
Various pack refactorings.
1325
    def get_pack_by_name(self, name):
1326
        """Get a Pack object by name.
1327
1328
        :param name: The name of the pack - e.g. '123456'
1329
        :return: A Pack object.
1330
        """
1331
        try:
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1332
            return self._packs_by_name[name]
2592.3.176 by Robert Collins
Various pack refactorings.
1333
        except KeyError:
1334
            rev_index = self._make_index(name, '.rix')
1335
            inv_index = self._make_index(name, '.iix')
1336
            txt_index = self._make_index(name, '.tix')
1337
            sig_index = self._make_index(name, '.six')
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1338
            result = ExistingPack(self._pack_transport, name, rev_index,
2592.3.191 by Robert Collins
Give Pack responsibility for index naming, and two concrete classes - NewPack for new packs and ExistingPack for packs we read from disk.
1339
                inv_index, txt_index, sig_index)
2592.3.178 by Robert Collins
Add pack objects to the api for PackCollection.create_pack_from_packs.
1340
            self.add_pack_to_memory(result)
2592.3.176 by Robert Collins
Various pack refactorings.
1341
            return result
1342
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1343
    def allocate(self, a_new_pack):
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1344
        """Allocate name in the list of packs.
1345
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1346
        :param a_new_pack: A NewPack instance to be added to the collection of
1347
            packs for this repository.
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1348
        """
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
1349
        self.ensure_loaded()
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1350
        if a_new_pack.name in self._names:
2951.2.7 by Robert Collins
Raise an error on duplicate pack name allocation.
1351
            raise errors.BzrError(
1352
                'Pack %r already exists in %s' % (a_new_pack.name, self))
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1353
        self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1354
        self.add_pack_to_memory(a_new_pack)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1355
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1356
    def _iter_disk_pack_index(self):
1357
        """Iterate over the contents of the pack-names index.
1358
        
1359
        This is used when loading the list from disk, and before writing to
1360
        detect updates from others during our write operation.
1361
        :return: An iterator of the index contents.
1362
        """
1363
        return GraphIndex(self.transport, 'pack-names', None
1364
                ).iter_all_entries()
1365
2592.3.176 by Robert Collins
Various pack refactorings.
1366
    def _make_index(self, name, suffix):
1367
        size_offset = self._suffix_offsets[suffix]
1368
        index_name = name + suffix
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1369
        index_size = self._names[name][size_offset]
2592.3.176 by Robert Collins
Various pack refactorings.
1370
        return GraphIndex(
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1371
            self._index_transport, index_name, index_size)
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
1372
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1373
    def _max_pack_count(self, total_revisions):
1374
        """Return the maximum number of packs to use for total revisions.
1375
        
1376
        :param total_revisions: The total number of revisions in the
1377
            repository.
1378
        """
1379
        if not total_revisions:
1380
            return 1
1381
        digits = str(total_revisions)
1382
        result = 0
1383
        for digit in digits:
1384
            result += int(digit)
1385
        return result
1386
1387
    def names(self):
1388
        """Provide an order to the underlying names."""
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1389
        return sorted(self._names.keys())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1390
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1391
    def _obsolete_packs(self, packs):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1392
        """Move a number of packs which have been obsoleted out of the way.
1393
1394
        Each pack and its associated indices are moved out of the way.
1395
1396
        Note: for correctness this function should only be called after a new
1397
        pack names index has been written without these pack names, and with
1398
        the names of packs that contain the data previously available via these
1399
        packs.
1400
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1401
        :param packs: The packs to obsolete.
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1402
        :param return: None.
1403
        """
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1404
        for pack in packs:
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
1405
            pack.pack_transport.rename(pack.file_name(),
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1406
                '../obsolete_packs/' + pack.file_name())
2592.3.226 by Martin Pool
formatting and docstrings
1407
            # TODO: Probably needs to know all possible indices for this pack
1408
            # - or maybe list the directory and move all indices matching this
2592.5.13 by Martin Pool
Clean up duplicate index_transport variables
1409
            # name whether we recognize it or not?
2592.3.187 by Robert Collins
Finish cleaning up the packing logic to take Pack objects - all tests pass.
1410
            for suffix in ('.iix', '.six', '.tix', '.rix'):
1411
                self._index_transport.rename(pack.name + suffix,
1412
                    '../obsolete_packs/' + pack.name + suffix)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1413
1414
    def pack_distribution(self, total_revisions):
1415
        """Generate a list of the number of revisions to put in each pack.
1416
1417
        :param total_revisions: The total number of revisions in the
1418
            repository.
1419
        """
1420
        if total_revisions == 0:
1421
            return [0]
1422
        digits = reversed(str(total_revisions))
1423
        result = []
1424
        for exponent, count in enumerate(digits):
1425
            size = 10 ** exponent
1426
            for pos in range(int(count)):
1427
                result.append(size)
1428
        return list(reversed(result))
1429
2592.5.12 by Martin Pool
Move pack_transport and pack_name onto RepositoryPackCollection
1430
    def _pack_tuple(self, name):
1431
        """Return a tuple with the transport and file name for a pack name."""
1432
        return self._pack_transport, name + '.pack'
1433
2592.3.236 by Martin Pool
Make RepositoryPackCollection.remove_pack_from_memory private
1434
    def _remove_pack_from_memory(self, pack):
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1435
        """Remove pack from the packs accessed by this repository.
1436
        
1437
        Only affects memory state, until self._save_pack_names() is invoked.
1438
        """
1439
        self._names.pop(pack.name)
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1440
        self._packs_by_name.pop(pack.name)
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1441
        self._remove_pack_indices(pack)
1442
1443
    def _remove_pack_indices(self, pack):
1444
        """Remove the indices for pack from the aggregated indices."""
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1445
        self.revision_index.remove_index(pack.revision_index, pack)
2592.3.211 by Robert Collins
Pack inventory index management cleaned up.
1446
        self.inventory_index.remove_index(pack.inventory_index, pack)
2592.3.212 by Robert Collins
Cleanup text index management in packs.
1447
        self.text_index.remove_index(pack.text_index, pack)
2592.3.210 by Robert Collins
Signature index management looking sane for packs.
1448
        self.signature_index.remove_index(pack.signature_index, pack)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1449
1450
    def reset(self):
2592.3.190 by Robert Collins
Move flush and reset operations to the pack collection rather than the thunk layers.
1451
        """Clear all cached data."""
1452
        # cached revision data
1453
        self.repo._revision_knit = None
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1454
        self.revision_index.clear()
2592.3.190 by Robert Collins
Move flush and reset operations to the pack collection rather than the thunk layers.
1455
        # cached signature data
1456
        self.repo._signature_knit = None
2592.3.210 by Robert Collins
Signature index management looking sane for packs.
1457
        self.signature_index.clear()
2592.3.212 by Robert Collins
Cleanup text index management in packs.
1458
        # cached file text data
1459
        self.text_index.clear()
2592.3.190 by Robert Collins
Move flush and reset operations to the pack collection rather than the thunk layers.
1460
        self.repo._text_knit = None
2592.3.211 by Robert Collins
Pack inventory index management cleaned up.
1461
        # cached inventory data
1462
        self.inventory_index.clear()
2592.3.192 by Robert Collins
Move new revision index management to NewPack.
1463
        # remove the open pack
1464
        self._new_pack = None
2592.3.190 by Robert Collins
Move flush and reset operations to the pack collection rather than the thunk layers.
1465
        # information about packs.
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1466
        self._names = None
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
1467
        self.packs = []
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1468
        self._packs_by_name = {}
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1469
        self._packs_at_load = None
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1470
2592.3.207 by Robert Collins
Start removing the dependency on RepositoryPackCollection._make_index_map.
1471
    def _make_index_map(self, index_suffix):
2592.3.226 by Martin Pool
formatting and docstrings
1472
        """Return information on existing indices.
2592.3.207 by Robert Collins
Start removing the dependency on RepositoryPackCollection._make_index_map.
1473
1474
        :param suffix: Index suffix added to pack name.
1475
1476
        :returns: (pack_map, indices) where indices is a list of GraphIndex 
1477
        objects, and pack_map is a mapping from those objects to the 
1478
        pack tuple they describe.
1479
        """
1480
        # TODO: stop using this; it creates new indices unnecessarily.
2592.3.176 by Robert Collins
Various pack refactorings.
1481
        self.ensure_loaded()
2592.3.226 by Martin Pool
formatting and docstrings
1482
        suffix_map = {'.rix': 'revision_index',
1483
            '.six': 'signature_index',
1484
            '.iix': 'inventory_index',
1485
            '.tix': 'text_index',
2592.3.207 by Robert Collins
Start removing the dependency on RepositoryPackCollection._make_index_map.
1486
        }
1487
        return self._packs_list_to_pack_map_and_index_list(self.all_packs(),
1488
            suffix_map[index_suffix])
2592.5.15 by Martin Pool
Split out common code for making index maps
1489
2592.3.179 by Robert Collins
Generate the revision_index_map for packing during the core operation, from the pack objects.
1490
    def _packs_list_to_pack_map_and_index_list(self, packs, index_attribute):
1491
        """Convert a list of packs to an index pack map and index list.
1492
1493
        :param packs: The packs list to process.
1494
        :param index_attribute: The attribute that the desired index is found
1495
            on.
1496
        :return: A tuple (map, list) where map contains the dict from
1497
            index:pack_tuple, and lsit contains the indices in the same order
1498
            as the packs list.
1499
        """
1500
        indices = []
1501
        pack_map = {}
1502
        for pack in packs:
1503
            index = getattr(pack, index_attribute)
1504
            indices.append(index)
2592.3.200 by Robert Collins
Make NewPack reopen the index files, separating out the task of refreshing the index maps in the repository and managing the completion of writing a single pack to disk.
1505
            pack_map[index] = (pack.pack_transport, pack.file_name())
2592.3.179 by Robert Collins
Generate the revision_index_map for packing during the core operation, from the pack objects.
1506
        return pack_map, indices
1507
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
1508
    def _index_contents(self, pack_map, key_filter=None):
1509
        """Get an iterable of the index contents from a pack_map.
1510
1511
        :param pack_map: A map from indices to pack details.
1512
        :param key_filter: An optional filter to limit the
1513
            keys returned.
1514
        """
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1515
        indices = [index for index in pack_map.iterkeys()]
1516
        all_index = CombinedGraphIndex(indices)
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
1517
        if key_filter is None:
1518
            return all_index.iter_all_entries()
1519
        else:
1520
            return all_index.iter_entries(key_filter)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1521
2592.3.237 by Martin Pool
Rename RepositoryPackCollection.release_names to _unlock_names
1522
    def _unlock_names(self):
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1523
        """Release the mutex around the pack-names index."""
1524
        self.repo.control_files.unlock()
1525
2948.1.1 by Robert Collins
* Obsolete packs are now cleaned up by pack and autopack operations.
1526
    def _save_pack_names(self, clear_obsolete_packs=False):
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1527
        """Save the list of packs.
1528
1529
        This will take out the mutex around the pack names list for the
1530
        duration of the method call. If concurrent updates have been made, a
1531
        three-way merge between the current list and the current in memory list
1532
        is performed.
2948.1.1 by Robert Collins
* Obsolete packs are now cleaned up by pack and autopack operations.
1533
1534
        :param clear_obsolete_packs: If True, clear out the contents of the
1535
            obsolete_packs directory.
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1536
        """
1537
        self.lock_names()
1538
        try:
1539
            builder = GraphIndexBuilder()
1540
            # load the disk nodes across
1541
            disk_nodes = set()
1542
            for index, key, value in self._iter_disk_pack_index():
1543
                disk_nodes.add((key, value))
1544
            # do a two-way diff against our original content
1545
            current_nodes = set()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1546
            for name, sizes in self._names.iteritems():
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1547
                current_nodes.add(
1548
                    ((name, ), ' '.join(str(size) for size in sizes)))
1549
            deleted_nodes = self._packs_at_load - current_nodes
1550
            new_nodes = current_nodes - self._packs_at_load
1551
            disk_nodes.difference_update(deleted_nodes)
1552
            disk_nodes.update(new_nodes)
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1553
            # TODO: handle same-name, index-size-changes here - 
1554
            # e.g. use the value from disk, not ours, *unless* we're the one
1555
            # changing it.
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1556
            for key, value in disk_nodes:
1557
                builder.add_node(key, value)
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
1558
            self.transport.put_file('pack-names', builder.finish(),
3416.2.2 by Martin Pool
Change some callers to get file and directory permissions from bzrdir not LockableFiles
1559
                mode=self.repo.bzrdir._get_file_mode())
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1560
            # move the baseline forward
1561
            self._packs_at_load = disk_nodes
2948.1.1 by Robert Collins
* Obsolete packs are now cleaned up by pack and autopack operations.
1562
            if clear_obsolete_packs:
3446.2.1 by Martin Pool
Failure to delete an obsolete pack file should not be fatal.
1563
                self._clear_obsolete_packs()
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1564
        finally:
2592.3.237 by Martin Pool
Rename RepositoryPackCollection.release_names to _unlock_names
1565
            self._unlock_names()
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1566
        # synchronise the memory packs list with what we just wrote:
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1567
        new_names = dict(disk_nodes)
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1568
        # drop no longer present nodes
1569
        for pack in self.all_packs():
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1570
            if (pack.name,) not in new_names:
2592.3.236 by Martin Pool
Make RepositoryPackCollection.remove_pack_from_memory private
1571
                self._remove_pack_from_memory(pack)
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1572
        # add new nodes/refresh existing ones
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1573
        for key, value in disk_nodes:
1574
            name = key[0]
1575
            sizes = self._parse_index_sizes(value)
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1576
            if name in self._names:
1577
                # existing
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1578
                if sizes != self._names[name]:
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1579
                    # the pack for name has had its indices replaced - rare but
1580
                    # important to handle. XXX: probably can never happen today
1581
                    # because the three-way merge code above does not handle it
1582
                    # - you may end up adding the same key twice to the new
1583
                    # disk index because the set values are the same, unless
1584
                    # the only index shows up as deleted by the set difference
1585
                    # - which it may. Until there is a specific test for this,
1586
                    # assume its broken. RBC 20071017.
2592.3.236 by Martin Pool
Make RepositoryPackCollection.remove_pack_from_memory private
1587
                    self._remove_pack_from_memory(self.get_pack_by_name(name))
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1588
                    self._names[name] = sizes
1589
                    self.get_pack_by_name(name)
1590
            else:
1591
                # new
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1592
                self._names[name] = sizes
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1593
                self.get_pack_by_name(name)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1594
3446.2.1 by Martin Pool
Failure to delete an obsolete pack file should not be fatal.
1595
    def _clear_obsolete_packs(self):
1596
        """Delete everything from the obsolete-packs directory.
1597
        """
1598
        obsolete_pack_transport = self.transport.clone('obsolete_packs')
1599
        for filename in obsolete_pack_transport.list_dir('.'):
1600
            try:
1601
                obsolete_pack_transport.delete(filename)
1602
            except (errors.PathError, errors.TransportError), e:
1603
                warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
1604
2592.3.202 by Robert Collins
Move write stream management into NewPack.
1605
    def _start_write_group(self):
2592.3.190 by Robert Collins
Move flush and reset operations to the pack collection rather than the thunk layers.
1606
        # Do not permit preparation for writing if we're not in a 'write lock'.
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1607
        if not self.repo.is_write_locked():
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1608
            raise errors.NotWriteLocked(self)
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
1609
        self._new_pack = NewPack(self._upload_transport, self._index_transport,
3010.1.11 by Robert Collins
Provide file modes to files created by pack repositories
1610
            self._pack_transport, upload_suffix='.pack',
3416.2.2 by Martin Pool
Change some callers to get file and directory permissions from bzrdir not LockableFiles
1611
            file_mode=self.repo.bzrdir._get_file_mode())
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1612
        # allow writing: queue writes to a new index
1613
        self.revision_index.add_writable_index(self._new_pack.revision_index,
1614
            self._new_pack)
2592.3.211 by Robert Collins
Pack inventory index management cleaned up.
1615
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
1616
            self._new_pack)
2592.3.212 by Robert Collins
Cleanup text index management in packs.
1617
        self.text_index.add_writable_index(self._new_pack.text_index,
1618
            self._new_pack)
2592.3.210 by Robert Collins
Signature index management looking sane for packs.
1619
        self.signature_index.add_writable_index(self._new_pack.signature_index,
1620
            self._new_pack)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1621
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
1622
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
1623
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
1624
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
1625
        self.repo.texts._index._add_callback = self.text_index.add_callback
2592.5.9 by Martin Pool
Move some more bits that seem to belong in RepositoryPackCollection into there
1626
2592.5.8 by Martin Pool
Delegate abort_write_group to RepositoryPackCollection
1627
    def _abort_write_group(self):
1628
        # FIXME: just drop the transient index.
1629
        # forget what names there are
3163.1.2 by Martin Pool
RepositoryPackCollection._abort_write_group should check it actually has a new pack before aborting (#180208)
1630
        if self._new_pack is not None:
1631
            self._new_pack.abort()
1632
            self._remove_pack_indices(self._new_pack)
1633
            self._new_pack = None
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1634
        self.repo._text_knit = None
2592.5.6 by Martin Pool
Move pack repository start_write_group to pack collection object
1635
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
1636
    def _commit_write_group(self):
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1637
        self._remove_pack_indices(self._new_pack)
2592.3.198 by Robert Collins
Factor out data_inserted to reduce code duplication in detecting empty packs.
1638
        if self._new_pack.data_inserted():
2592.3.209 by Robert Collins
Revision index management looking sane for packs.
1639
            # get all the data to disk and read to use
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
1640
            self._new_pack.finish()
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1641
            self.allocate(self._new_pack)
2592.3.194 by Robert Collins
Output the revision index from NewPack.finish
1642
            self._new_pack = None
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
1643
            if not self.autopack():
2592.3.201 by Robert Collins
Cleanup RepositoryPackCollection.allocate.
1644
                # when autopack takes no steps, the names list is still
1645
                # unsaved.
2592.5.10 by Martin Pool
Rename RepositoryPackCollection.save to _save_pack_names
1646
                self._save_pack_names()
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
1647
        else:
2592.3.202 by Robert Collins
Move write stream management into NewPack.
1648
            self._new_pack.abort()
2951.1.1 by Robert Collins
(robertc) Fix data-refresh logic for packs not to refresh mid-transaction when a names write lock is held. (Robert Collins)
1649
            self._new_pack = None
2592.3.213 by Robert Collins
Retain packs and indices in memory within a lock, even when write groups are entered and exited.
1650
        self.repo._text_knit = None
2592.5.8 by Martin Pool
Delegate abort_write_group to RepositoryPackCollection
1651
1652
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
1653
class KnitPackRepository(KnitRepository):
3350.6.7 by Robert Collins
Review feedback, making things more clear, adding documentation on what is used where.
1654
    """Repository with knit objects stored inside pack containers.
1655
    
1656
    The layering for a KnitPackRepository is:
1657
1658
    Graph        |  HPSS    | Repository public layer |
1659
    ===================================================
1660
    Tuple based apis below, string based, and key based apis above
1661
    ---------------------------------------------------
1662
    KnitVersionedFiles
1663
      Provides .texts, .revisions etc
1664
      This adapts the N-tuple keys to physical knit records which only have a
1665
      single string identifier (for historical reasons), which in older formats
1666
      was always the revision_id, and in the mapped code for packs is always
1667
      the last element of key tuples.
1668
    ---------------------------------------------------
1669
    GraphIndex
1670
      A separate GraphIndex is used for each of the
1671
      texts/inventories/revisions/signatures contained within each individual
1672
      pack file. The GraphIndex layer works in N-tuples and is unaware of any
1673
      semantic value.
1674
    ===================================================
1675
    
1676
    """
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1677
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
1678
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1679
        _serializer):
1680
        KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1681
            _commit_builder_class, _serializer)
3407.2.13 by Martin Pool
Remove indirection through control_files to get transports
1682
        index_transport = self._transport.clone('indices')
3350.6.5 by Robert Collins
Update to bzr.dev.
1683
        self._pack_collection = RepositoryPackCollection(self, self._transport,
2592.5.11 by Martin Pool
Move upload_transport from pack repositories to the pack collection
1684
            index_transport,
3407.2.13 by Martin Pool
Remove indirection through control_files to get transports
1685
            self._transport.clone('upload'),
1686
            self._transport.clone('packs'))
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
1687
        self.inventories = KnitVersionedFiles(
1688
            _KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
1689
                add_callback=self._pack_collection.inventory_index.add_callback,
1690
                deltas=True, parents=True, is_locked=self.is_locked),
1691
            data_access=self._pack_collection.inventory_index.data_access,
1692
            max_delta_chain=200)
1693
        self.revisions = KnitVersionedFiles(
1694
            _KnitGraphIndex(self._pack_collection.revision_index.combined_index,
1695
                add_callback=self._pack_collection.revision_index.add_callback,
1696
                deltas=False, parents=True, is_locked=self.is_locked),
1697
            data_access=self._pack_collection.revision_index.data_access,
1698
            max_delta_chain=0)
1699
        self.signatures = KnitVersionedFiles(
1700
            _KnitGraphIndex(self._pack_collection.signature_index.combined_index,
1701
                add_callback=self._pack_collection.signature_index.add_callback,
1702
                deltas=False, parents=False, is_locked=self.is_locked),
1703
            data_access=self._pack_collection.signature_index.data_access,
1704
            max_delta_chain=0)
1705
        self.texts = KnitVersionedFiles(
1706
            _KnitGraphIndex(self._pack_collection.text_index.combined_index,
1707
                add_callback=self._pack_collection.text_index.add_callback,
1708
                deltas=True, parents=True, is_locked=self.is_locked),
1709
            data_access=self._pack_collection.text_index.data_access,
1710
            max_delta_chain=200)
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1711
        # True when the repository object is 'write locked' (as opposed to the
1712
        # physical lock only taken out around changes to the pack-names list.) 
1713
        # Another way to represent this would be a decorator around the control
1714
        # files object that presents logical locks as physical ones - if this
1715
        # gets ugly consider that alternative design. RBC 20071011
1716
        self._write_lock_count = 0
1717
        self._transaction = None
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1718
        # for tests
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
1719
        self._reconcile_does_inventory_gc = True
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1720
        self._reconcile_fixes_text_parents = True
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
1721
        self._reconcile_backsup_inventory = False
3565.3.1 by Robert Collins
* The generic fetch code now uses two attributes on Repository objects
1722
        self._fetch_order = 'unsorted'
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1723
3575.3.1 by Andrew Bennetts
Deprecate knit repositories.
1724
    def _warn_if_deprecated(self):
1725
        # This class isn't deprecated
1726
        pass
1727
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1728
    def _abort_write_group(self):
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1729
        self._pack_collection._abort_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1730
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1731
    def _find_inconsistent_revision_parents(self):
1732
        """Find revisions with incorrectly cached parents.
1733
1734
        :returns: an iterator yielding tuples of (revison-id, parents-in-index,
1735
            parents-in-revision).
1736
        """
3052.1.6 by John Arbash Meinel
Change the lock check to raise ObjectNotLocked.
1737
        if not self.is_locked():
1738
            raise errors.ObjectNotLocked(self)
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1739
        pb = ui.ui_factory.nested_progress_bar()
2951.1.11 by Robert Collins
Do not try to use try:finally: around a yield for python 2.4.
1740
        result = []
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1741
        try:
1742
            revision_nodes = self._pack_collection.revision_index \
1743
                .combined_index.iter_all_entries()
1744
            index_positions = []
1745
            # Get the cached index values for all revisions, and also the location
1746
            # in each index of the revision text so we can perform linear IO.
1747
            for index, key, value, refs in revision_nodes:
1748
                pos, length = value[1:].split(' ')
1749
                index_positions.append((index, int(pos), key[0],
1750
                    tuple(parent[0] for parent in refs[0])))
1751
                pb.update("Reading revision index.", 0, 0)
1752
            index_positions.sort()
2951.1.10 by Robert Collins
Peer review feedback with Ian.
1753
            batch_count = len(index_positions) / 1000 + 1
1754
            pb.update("Checking cached revision graph.", 0, batch_count)
1755
            for offset in xrange(batch_count):
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1756
                pb.update("Checking cached revision graph.", offset)
1757
                to_query = index_positions[offset * 1000:(offset + 1) * 1000]
1758
                if not to_query:
1759
                    break
1760
                rev_ids = [item[2] for item in to_query]
1761
                revs = self.get_revisions(rev_ids)
1762
                for revision, item in zip(revs, to_query):
1763
                    index_parents = item[3]
1764
                    rev_parents = tuple(revision.parent_ids)
1765
                    if index_parents != rev_parents:
2951.1.11 by Robert Collins
Do not try to use try:finally: around a yield for python 2.4.
1766
                        result.append((revision.revision_id, index_parents, rev_parents))
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1767
        finally:
1768
            pb.finished()
2951.1.11 by Robert Collins
Do not try to use try:finally: around a yield for python 2.4.
1769
        return result
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
1770
3099.3.3 by John Arbash Meinel
Deprecate get_parents() in favor of get_parent_map()
1771
    @symbol_versioning.deprecated_method(symbol_versioning.one_one)
2592.3.216 by Robert Collins
Implement get_parents and _make_parents_provider for Pack repositories.
1772
    def get_parents(self, revision_ids):
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
1773
        """See graph._StackedParentsProvider.get_parents."""
1774
        parent_map = self.get_parent_map(revision_ids)
1775
        return [parent_map.get(r, None) for r in revision_ids]
1776
2592.3.216 by Robert Collins
Implement get_parents and _make_parents_provider for Pack repositories.
1777
    def _make_parents_provider(self):
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
1778
        return graph.CachingParentsProvider(self)
2592.3.216 by Robert Collins
Implement get_parents and _make_parents_provider for Pack repositories.
1779
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1780
    def _refresh_data(self):
2951.1.1 by Robert Collins
(robertc) Fix data-refresh logic for packs not to refresh mid-transaction when a names write lock is held. (Robert Collins)
1781
        if self._write_lock_count == 1 or (
1782
            self.control_files._lock_count == 1 and
1783
            self.control_files._lock_mode == 'r'):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1784
            # forget what names there are
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1785
            self._pack_collection.reset()
2592.3.219 by Robert Collins
Review feedback.
1786
            # XXX: Better to do an in-memory merge when acquiring a new lock -
1787
            # factor out code from _save_pack_names.
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
1788
            self._pack_collection.ensure_loaded()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1789
1790
    def _start_write_group(self):
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1791
        self._pack_collection._start_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1792
1793
    def _commit_write_group(self):
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1794
        return self._pack_collection._commit_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1795
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1796
    def get_transaction(self):
1797
        if self._write_lock_count:
1798
            return self._transaction
1799
        else:
1800
            return self.control_files.get_transaction()
1801
1802
    def is_locked(self):
1803
        return self._write_lock_count or self.control_files.is_locked()
1804
1805
    def is_write_locked(self):
1806
        return self._write_lock_count
1807
1808
    def lock_write(self, token=None):
1809
        if not self._write_lock_count and self.is_locked():
1810
            raise errors.ReadOnlyError(self)
1811
        self._write_lock_count += 1
1812
        if self._write_lock_count == 1:
1813
            self._transaction = transactions.WriteTransaction()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1814
            for repo in self._fallback_repositories:
1815
                # Writes don't affect fallback repos
1816
                repo.lock_read()
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1817
        self._refresh_data()
1818
1819
    def lock_read(self):
1820
        if self._write_lock_count:
1821
            self._write_lock_count += 1
1822
        else:
1823
            self.control_files.lock_read()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1824
            for repo in self._fallback_repositories:
1825
                # Writes don't affect fallback repos
1826
                repo.lock_read()
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1827
        self._refresh_data()
1828
1829
    def leave_lock_in_place(self):
1830
        # not supported - raise an error
1831
        raise NotImplementedError(self.leave_lock_in_place)
1832
1833
    def dont_leave_lock_in_place(self):
1834
        # not supported - raise an error
1835
        raise NotImplementedError(self.dont_leave_lock_in_place)
1836
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1837
    @needs_write_lock
1838
    def pack(self):
1839
        """Compress the data within the repository.
1840
1841
        This will pack all the data to a single pack. In future it may
1842
        recompress deltas or do other such expensive operations.
1843
        """
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
1844
        self._pack_collection.pack()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1845
1846
    @needs_write_lock
1847
    def reconcile(self, other=None, thorough=False):
1848
        """Reconcile this repository."""
1849
        from bzrlib.reconcile import PackReconciler
1850
        reconciler = PackReconciler(self, thorough=thorough)
1851
        reconciler.reconcile()
1852
        return reconciler
1853
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1854
    def unlock(self):
1855
        if self._write_lock_count == 1 and self._write_group is not None:
2592.3.244 by Martin Pool
unlock while in a write group now aborts the write group, unlocks, and errors.
1856
            self.abort_write_group()
1857
            self._transaction = None
1858
            self._write_lock_count = 0
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1859
            raise errors.BzrError(
2592.3.244 by Martin Pool
unlock while in a write group now aborts the write group, unlocks, and errors.
1860
                'Must end write group before releasing write lock on %s'
1861
                % self)
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1862
        if self._write_lock_count:
1863
            self._write_lock_count -= 1
1864
            if not self._write_lock_count:
1865
                transaction = self._transaction
1866
                self._transaction = None
1867
                transaction.finish()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1868
                for repo in self._fallback_repositories:
1869
                    repo.unlock()
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1870
        else:
1871
            self.control_files.unlock()
3221.12.13 by Robert Collins
Implement generic stacking rather than pack-internals based stacking.
1872
            for repo in self._fallback_repositories:
1873
                repo.unlock()
1874
1875
1876
class RepositoryFormatPack(MetaDirRepositoryFormat):
1877
    """Format logic for pack structured repositories.
1878
1879
    This repository format has:
1880
     - a list of packs in pack-names
1881
     - packs in packs/NAME.pack
1882
     - indices in indices/NAME.{iix,six,tix,rix}
1883
     - knit deltas in the packs, knit indices mapped to the indices.
1884
     - thunk objects to support the knits programming API.
1885
     - a format marker of its own
1886
     - an optional 'shared-storage' flag
1887
     - an optional 'no-working-trees' flag
1888
     - a LockDir lock
1889
    """
1890
1891
    # Set this attribute in derived classes to control the repository class
1892
    # created by open and initialize.
1893
    repository_class = None
1894
    # Set this attribute in derived classes to control the
1895
    # _commit_builder_class that the repository objects will have passed to
1896
    # their constructor.
1897
    _commit_builder_class = None
1898
    # Set this attribute in derived clases to control the _serializer that the
1899
    # repository objects will have passed to their constructor.
1900
    _serializer = None
1901
    # External references are not supported in pack repositories yet.
1902
    supports_external_lookups = False
1903
1904
    def initialize(self, a_bzrdir, shared=False):
1905
        """Create a pack based repository.
1906
1907
        :param a_bzrdir: bzrdir to contain the new repository; must already
1908
            be initialized.
1909
        :param shared: If true the repository will be initialized as a shared
1910
                       repository.
1911
        """
1912
        mutter('creating repository in %s.', a_bzrdir.transport.base)
1913
        dirs = ['indices', 'obsolete_packs', 'packs', 'upload']
1914
        builder = GraphIndexBuilder()
1915
        files = [('pack-names', builder.finish())]
1916
        utf8_files = [('format', self.get_format_string())]
1917
        
1918
        self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
1919
        return self.open(a_bzrdir=a_bzrdir, _found=True)
1920
1921
    def open(self, a_bzrdir, _found=False, _override_transport=None):
1922
        """See RepositoryFormat.open().
1923
        
1924
        :param _override_transport: INTERNAL USE ONLY. Allows opening the
1925
                                    repository at a slightly different url
1926
                                    than normal. I.e. during 'upgrade'.
1927
        """
1928
        if not _found:
1929
            format = RepositoryFormat.find_format(a_bzrdir)
1930
        if _override_transport is not None:
1931
            repo_transport = _override_transport
1932
        else:
1933
            repo_transport = a_bzrdir.get_repository_transport(None)
1934
        control_files = lockable_files.LockableFiles(repo_transport,
1935
                                'lock', lockdir.LockDir)
1936
        return self.repository_class(_format=self,
1937
                              a_bzrdir=a_bzrdir,
1938
                              control_files=control_files,
1939
                              _commit_builder_class=self._commit_builder_class,
1940
                              _serializer=self._serializer)
1941
1942
1943
class RepositoryFormatKnitPack1(RepositoryFormatPack):
1944
    """A no-subtrees parameterized Pack repository.
1945
1946
    This format was introduced in 0.92.
1947
    """
1948
1949
    repository_class = KnitPackRepository
1950
    _commit_builder_class = PackCommitBuilder
1951
    _serializer = xml5.serializer_v5
1952
1953
    def _get_matching_bzrdir(self):
1954
        return bzrdir.format_registry.make_bzrdir('pack-0.92')
1955
1956
    def _ignore_setting_bzrdir(self, format):
1957
        pass
1958
1959
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1960
1961
    def get_format_string(self):
1962
        """See RepositoryFormat.get_format_string()."""
1963
        return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
1964
1965
    def get_format_description(self):
1966
        """See RepositoryFormat.get_format_description()."""
1967
        return "Packs containing knits without subtree support"
1968
1969
    def check_conversion_target(self, target_format):
1970
        pass
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1971
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1972
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
1973
class RepositoryFormatKnitPack3(RepositoryFormatPack):
3128.1.3 by Vincent Ladeuil
Since we are there s/parameteris.*/parameteriz&/.
1974
    """A subtrees parameterized Pack repository.
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1975
2592.3.215 by Robert Collins
Review feedback.
1976
    This repository format uses the xml7 serializer to get:
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1977
     - support for recording full info about the tree root
1978
     - support for recording tree-references
2592.3.215 by Robert Collins
Review feedback.
1979
2939.2.1 by Ian Clatworthy
use 'knitpack' naming instead of 'experimental' for pack formats
1980
    This format was introduced in 0.92.
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1981
    """
1982
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
1983
    repository_class = KnitPackRepository
2592.3.166 by Robert Collins
Merge KnitRepository3 removal branch.
1984
    _commit_builder_class = PackRootCommitBuilder
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1985
    rich_root_data = True
1986
    supports_tree_reference = True
2592.3.166 by Robert Collins
Merge KnitRepository3 removal branch.
1987
    _serializer = xml7.serializer_v7
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1988
1989
    def _get_matching_bzrdir(self):
2939.2.5 by Ian Clatworthy
review feedback from lifeless
1990
        return bzrdir.format_registry.make_bzrdir(
3010.3.2 by Martin Pool
Rename pack0.92 to pack-0.92
1991
            'pack-0.92-subtree')
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1992
1993
    def _ignore_setting_bzrdir(self, format):
1994
        pass
1995
1996
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1997
1998
    def check_conversion_target(self, target_format):
1999
        if not target_format.rich_root_data:
2000
            raise errors.BadConversionTarget(
2001
                'Does not support rich root data.', target_format)
2002
        if not getattr(target_format, 'supports_tree_reference', False):
2003
            raise errors.BadConversionTarget(
2004
                'Does not support nested trees', target_format)
2005
            
2006
    def get_format_string(self):
2007
        """See RepositoryFormat.get_format_string()."""
2939.2.6 by Ian Clatworthy
more review feedback from lifeless and poolie
2008
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2009
2010
    def get_format_description(self):
2011
        """See RepositoryFormat.get_format_description()."""
2939.2.1 by Ian Clatworthy
use 'knitpack' naming instead of 'experimental' for pack formats
2012
        return "Packs containing knits with subtree support\n"
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
2013
2014
2015
class RepositoryFormatKnitPack4(RepositoryFormatPack):
3128.1.3 by Vincent Ladeuil
Since we are there s/parameteris.*/parameteriz&/.
2016
    """A rich-root, no subtrees parameterized Pack repository.
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
2017
2996.2.12 by Aaron Bentley
Text fixes from review
2018
    This repository format uses the xml6 serializer to get:
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
2019
     - support for recording full info about the tree root
2020
2996.2.12 by Aaron Bentley
Text fixes from review
2021
    This format was introduced in 1.0.
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
2022
    """
2023
2024
    repository_class = KnitPackRepository
2025
    _commit_builder_class = PackRootCommitBuilder
2026
    rich_root_data = True
2027
    supports_tree_reference = False
2028
    _serializer = xml6.serializer_v6
2029
2030
    def _get_matching_bzrdir(self):
2031
        return bzrdir.format_registry.make_bzrdir(
2032
            'rich-root-pack')
2033
2034
    def _ignore_setting_bzrdir(self, format):
2035
        pass
2036
2037
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2038
2039
    def check_conversion_target(self, target_format):
2040
        if not target_format.rich_root_data:
2041
            raise errors.BadConversionTarget(
2042
                'Does not support rich root data.', target_format)
2043
2044
    def get_format_string(self):
2045
        """See RepositoryFormat.get_format_string()."""
2046
        return ("Bazaar pack repository format 1 with rich root"
2047
                " (needs bzr 1.0)\n")
2048
2049
    def get_format_description(self):
2050
        """See RepositoryFormat.get_format_description()."""
2051
        return "Packs containing knits with rich root support\n"
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2052
2053
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2054
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2055
    """Repository that supports external references to allow stacking.
2056
2057
    New in release 1.6.
2058
2059
    Supports external lookups, which results in non-truncated ghosts after
2060
    reconcile compared to pack-0.92 formats.
2061
    """
2062
2063
    repository_class = KnitPackRepository
2064
    _commit_builder_class = PackCommitBuilder
2065
    _serializer = xml5.serializer_v5
2066
    supports_external_lookups = True
2067
2068
    def _get_matching_bzrdir(self):
2069
        return bzrdir.format_registry.make_bzrdir('development1')
2070
2071
    def _ignore_setting_bzrdir(self, format):
2072
        pass
2073
2074
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2075
2076
    def get_format_string(self):
2077
        """See RepositoryFormat.get_format_string()."""
2078
        return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2079
2080
    def get_format_description(self):
2081
        """See RepositoryFormat.get_format_description()."""
3606.3.1 by Aaron Bentley
Update repo format strings
2082
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2083
2084
    def check_conversion_target(self, target_format):
2085
        pass
2086
2087
3549.1.6 by Martin Pool
Change stacked-subtree to stacked-rich-root
2088
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
3606.3.1 by Aaron Bentley
Update repo format strings
2089
    """A repository with rich roots and external references.
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2090
2091
    New in release 1.6.
2092
2093
    Supports external lookups, which results in non-truncated ghosts after
2094
    reconcile compared to pack-0.92 formats.
2095
    """
2096
2097
    repository_class = KnitPackRepository
2098
    _commit_builder_class = PackRootCommitBuilder
2099
    rich_root_data = True
3549.1.6 by Martin Pool
Change stacked-subtree to stacked-rich-root
2100
    supports_tree_reference = False # no subtrees
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2101
    _serializer = xml7.serializer_v7
2102
2103
    supports_external_lookups = True
2104
2105
    def _get_matching_bzrdir(self):
2106
        return bzrdir.format_registry.make_bzrdir(
2107
            'development1-subtree')
2108
2109
    def _ignore_setting_bzrdir(self, format):
2110
        pass
2111
2112
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2113
2114
    def check_conversion_target(self, target_format):
2115
        if not target_format.rich_root_data:
2116
            raise errors.BadConversionTarget(
2117
                'Does not support rich root data.', target_format)
2118
            
2119
    def get_format_string(self):
2120
        """See RepositoryFormat.get_format_string()."""
3549.1.6 by Martin Pool
Change stacked-subtree to stacked-rich-root
2121
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2122
2123
    def get_format_description(self):
3606.3.1 by Aaron Bentley
Update repo format strings
2124
        return "Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
3549.1.5 by Martin Pool
Add stable format names for stacked branches
2125
2126
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2127
class RepositoryFormatPackDevelopment0(RepositoryFormatPack):
2128
    """A no-subtrees development repository.
2129
2130
    This format should be retained until the second release after bzr 1.0.
2131
2132
    No changes to the disk behaviour from pack-0.92.
2133
    """
2134
2135
    repository_class = KnitPackRepository
2136
    _commit_builder_class = PackCommitBuilder
2137
    _serializer = xml5.serializer_v5
2138
2139
    def _get_matching_bzrdir(self):
2140
        return bzrdir.format_registry.make_bzrdir('development0')
2141
2142
    def _ignore_setting_bzrdir(self, format):
2143
        pass
2144
2145
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2146
2147
    def get_format_string(self):
2148
        """See RepositoryFormat.get_format_string()."""
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2149
        return "Bazaar development format 0 (needs bzr.dev from before 1.3)\n"
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2150
2151
    def get_format_description(self):
2152
        """See RepositoryFormat.get_format_description()."""
2153
        return ("Development repository format, currently the same as "
2154
            "pack-0.92\n")
2155
2156
    def check_conversion_target(self, target_format):
2157
        pass
2158
2159
2160
class RepositoryFormatPackDevelopment0Subtree(RepositoryFormatPack):
2161
    """A subtrees development repository.
2162
2163
    This format should be retained until the second release after bzr 1.0.
2164
2165
    No changes to the disk behaviour from pack-0.92-subtree.
2166
    """
2167
2168
    repository_class = KnitPackRepository
2169
    _commit_builder_class = PackRootCommitBuilder
2170
    rich_root_data = True
2171
    supports_tree_reference = True
2172
    _serializer = xml7.serializer_v7
2173
2174
    def _get_matching_bzrdir(self):
2175
        return bzrdir.format_registry.make_bzrdir(
2176
            'development0-subtree')
2177
2178
    def _ignore_setting_bzrdir(self, format):
2179
        pass
2180
2181
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2182
2183
    def check_conversion_target(self, target_format):
2184
        if not target_format.rich_root_data:
2185
            raise errors.BadConversionTarget(
2186
                'Does not support rich root data.', target_format)
2187
        if not getattr(target_format, 'supports_tree_reference', False):
2188
            raise errors.BadConversionTarget(
2189
                'Does not support nested trees', target_format)
2190
            
2191
    def get_format_string(self):
2192
        """See RepositoryFormat.get_format_string()."""
2193
        return ("Bazaar development format 0 with subtree support "
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2194
            "(needs bzr.dev from before 1.3)\n")
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2195
2196
    def get_format_description(self):
2197
        """See RepositoryFormat.get_format_description()."""
2198
        return ("Development repository format, currently the same as "
2199
            "pack-0.92-subtree\n")
2200
2201
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2202
class RepositoryFormatPackDevelopment1(RepositoryFormatPackDevelopment0):
2203
    """A no-subtrees development repository.
2204
3221.17.1 by Ian Clatworthy
tweak version numbers being released in
2205
    This format should be retained until the second release after bzr 1.5.
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2206
2207
    Supports external lookups, which results in non-truncated ghosts after
2208
    reconcile compared to pack-0.92 formats.
2209
    """
2210
2211
    supports_external_lookups = True
2212
2213
    def _get_matching_bzrdir(self):
2214
        return bzrdir.format_registry.make_bzrdir('development1')
2215
2216
    def _ignore_setting_bzrdir(self, format):
2217
        pass
2218
2219
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2220
2221
    def get_format_string(self):
2222
        """See RepositoryFormat.get_format_string()."""
3221.17.1 by Ian Clatworthy
tweak version numbers being released in
2223
        return "Bazaar development format 1 (needs bzr.dev from before 1.6)\n"
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2224
2225
    def get_format_description(self):
2226
        """See RepositoryFormat.get_format_description()."""
2227
        return ("Development repository format, currently the same as "
2228
            "pack-0.92 with external reference support.\n")
2229
2230
    def check_conversion_target(self, target_format):
2231
        pass
2232
2233
3221.11.7 by Robert Collins
Merge in real stacked repository work.
2234
class RepositoryFormatPackDevelopment1Subtree(RepositoryFormatPackDevelopment0Subtree):
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2235
    """A subtrees development repository.
2236
3221.17.1 by Ian Clatworthy
tweak version numbers being released in
2237
    This format should be retained until the second release after bzr 1.5.
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2238
2239
    Supports external lookups, which results in non-truncated ghosts after
2240
    reconcile compared to pack-0.92 formats.
2241
    """
2242
2243
    supports_external_lookups = True
2244
2245
    def _get_matching_bzrdir(self):
2246
        return bzrdir.format_registry.make_bzrdir(
2247
            'development1-subtree')
2248
2249
    def _ignore_setting_bzrdir(self, format):
2250
        pass
2251
2252
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2253
2254
    def check_conversion_target(self, target_format):
2255
        if not target_format.rich_root_data:
2256
            raise errors.BadConversionTarget(
2257
                'Does not support rich root data.', target_format)
2258
        if not getattr(target_format, 'supports_tree_reference', False):
2259
            raise errors.BadConversionTarget(
2260
                'Does not support nested trees', target_format)
2261
            
2262
    def get_format_string(self):
2263
        """See RepositoryFormat.get_format_string()."""
2264
        return ("Bazaar development format 1 with subtree support "
3221.17.1 by Ian Clatworthy
tweak version numbers being released in
2265
            "(needs bzr.dev from before 1.6)\n")
3221.12.1 by Robert Collins
Backport development1 format (stackable packs) to before-shallow-branches.
2266
2267
    def get_format_description(self):
2268
        """See RepositoryFormat.get_format_description()."""
2269
        return ("Development repository format, currently the same as "
2270
            "pack-0.92-subtree with external reference support.\n")