~bzr-pqm/bzr/bzr.dev

2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
19
from itertools import izip
20
import math
21
import md5
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
22
import time
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
23
24
from bzrlib import (
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
25
        debug,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
26
        pack,
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
27
        ui,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
28
        )
29
from bzrlib.index import (
30
    GraphIndex,
31
    GraphIndexBuilder,
32
    InMemoryGraphIndex,
33
    CombinedGraphIndex,
34
    GraphIndexPrefixAdapter,
35
    )
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
36
from bzrlib.knit import KnitGraphIndex, _PackAccess, _KnitData
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
37
from bzrlib.pack import ContainerWriter
38
from bzrlib.store import revision
39
""")
40
from bzrlib import (
41
    bzrdir,
42
    deprecated_graph,
43
    errors,
44
    knit,
45
    lockable_files,
46
    lockdir,
47
    osutils,
48
    transactions,
49
    xml5,
50
    xml7,
51
    )
52
53
from bzrlib.decorators import needs_read_lock, needs_write_lock
54
from bzrlib.repofmt.knitrepo import KnitRepository, KnitRepository3
55
from bzrlib.repository import (
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
56
    CommitBuilder,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
57
    MetaDirRepository,
58
    MetaDirRepositoryFormat,
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
59
    RootCommitBuilder,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
60
    )
61
import bzrlib.revision as _mod_revision
62
from bzrlib.store.revision.knit import KnitRevisionStore
63
from bzrlib.store.versioned import VersionedFileStore
64
from bzrlib.trace import mutter, note, warning
65
66
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
67
class PackCommitBuilder(CommitBuilder):
68
    """A subclass of CommitBuilder to add texts with pack semantics.
69
    
70
    Specifically this uses one knit object rather than one knit object per
71
    added text, reducing memory and object pressure.
72
    """
73
74
    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
75
        return self.repository._packs._add_text_to_weave(file_id,
76
            self._new_revision_id, new_lines, parents, nostore_sha,
77
            self.random_revid)
78
79
80
class PackRootCommitBuilder(RootCommitBuilder):
81
    """A subclass of RootCommitBuilder to add texts with pack semantics.
82
    
83
    Specifically this uses one knit object rather than one knit object per
84
    added text, reducing memory and object pressure.
85
    """
86
87
    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
88
        return self.repository._packs._add_text_to_weave(file_id,
89
            self._new_revision_id, new_lines, parents, nostore_sha,
90
            self.random_revid)
91
92
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
93
class Pack(object):
94
    """An in memory proxy for a .pack and its indices."""
95
96
    def __init__(self):
97
        self.revision_index = None
98
        self.inventory_index = None
99
        self.text_index = None
100
        self.signature_index = None
101
        self.name = None
102
        self.transport = None
103
104
    def get_revision_count(self):
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
105
        return self.revision_index.key_count()
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
106
107
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
108
class RepositoryPackCollection(object):
2592.5.2 by Martin Pool
Add docstring
109
    """Management of packs within a repository."""
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
110
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
111
    def __init__(self, repo, transport, index_transport):
112
        """Create a new RepositoryPackCollection.
113
114
        :param transport: Addresses the repository base directory 
115
            (typically .bzr/repository/).
116
        :param index_transport: Addresses the directory containing indexes.
117
        """
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
118
        self.repo = repo
119
        self.transport = transport
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
120
        self._index_transport = index_transport
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
121
        self.packs = []
122
123
    def add_pack_to_memory(self, pack):
124
        """Make a Pack object available to the repository to satisfy queries.
125
        
126
        :param pack: A Pack object.
127
        """
128
        self.packs.append(pack)
129
        if self.repo._revision_all_indices is None:
130
            # to make this function more useful, perhaps we should make an
131
            # all_indices object in future?
132
            pass
133
        else:
134
            self.repo._revision_pack_map[pack.revision_index] = (
135
                pack.transport, pack.name)
136
            self.repo._revision_all_indices.insert_index(0, pack.revision_index)
137
        if self.repo._inv_all_indices is not None:
138
            # inv 'knit' has been used : update it.
139
            self.repo._inv_all_indices.insert_index(0,
140
                pack.inventory_index)
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
141
            self.repo._inv_pack_map[pack.inventory_index] = pack.transport, pack.name + '.pack'
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
142
        if self.repo._text_all_indices is not None:
143
            # text 'knits' have been used : update it.
144
            self.repo._text_all_indices.insert_index(0,
145
                pack.text_index)
146
        if self.repo._signature_all_indices is not None:
147
            # sigatures 'knit' accessed : update it.
148
            self.repo._signature_all_indices.insert_index(0,
149
                pack.signature_index)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
150
        
151
    def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,
152
        nostore_sha, random_revid):
153
        file_id_index = GraphIndexPrefixAdapter(
154
            self.repo._text_all_indices,
155
            (file_id, ), 1,
156
            add_nodes_callback=self.repo._text_write_index.add_nodes)
157
        self.repo._text_knit._index._graph_index = file_id_index
158
        self.repo._text_knit._index._add_callback = file_id_index.add_nodes
159
        return self.repo._text_knit.add_lines_with_ghosts(
160
            revision_id, parents, new_lines, nostore_sha=nostore_sha,
2592.3.137 by Robert Collins
Enable check_content=False for pack commits.
161
            random_id=random_revid, check_content=False)[0:2]
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
162
163
    def all_pack_details(self):
164
        """Return a list of all the packs as transport,name tuples.
165
166
        :return: A list of (transport, name) tuples for all the packs in the
167
            repository.
168
        """
169
        # XXX: fix me, should be direct rather than indirect
170
        if self.repo._revision_all_indices is None:
171
            # trigger creation of the all revision index.
172
            self.repo._revision_store.get_revision_file(self.repo.get_transaction())
173
        result = []
174
        for index, transport_and_name in self.repo._revision_pack_map.iteritems():
175
            result.append(transport_and_name)
176
        return result
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
177
178
    def autopack(self):
179
        """Pack the pack collection incrementally.
180
        
181
        This will not attempt global reorganisation or recompression,
182
        rather it will just ensure that the total number of packs does
183
        not grow without bound. It uses the _max_pack_count method to
184
        determine if autopacking is needed, and the pack_distribution
185
        method to determine the number of revisions in each pack.
186
187
        If autopacking takes place then the packs name collection will have
188
        been flushed to disk - packing requires updating the name collection
189
        in synchronisation with certain steps. Otherwise the names collection
190
        is not flushed.
191
192
        :return: True if packing took place.
193
        """
194
        if self.repo._revision_all_indices is None:
195
            # trigger creation of the all revision index.
196
            self.repo._revision_store.get_revision_file(self.repo.get_transaction())
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
197
        total_revisions = self.repo._revision_all_indices.key_count()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
198
        total_packs = len(self._names)
199
        if self._max_pack_count(total_revisions) >= total_packs:
200
            return False
201
        # XXX: the following may want to be a class, to pack with a given
202
        # policy.
203
        mutter('Auto-packing repository %s, which has %d pack files, '
204
            'containing %d revisions into %d packs.', self, total_packs,
205
            total_revisions, self._max_pack_count(total_revisions))
206
        # determine which packs need changing
207
        pack_distribution = self.pack_distribution(total_revisions)
208
        existing_packs = []
209
        for index, transport_and_name in self.repo._revision_pack_map.iteritems():
210
            if index is None:
211
                continue
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
212
            revision_count = index.key_count()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
213
            if revision_count == 0:
214
                # revision less packs are not generated by normal operation,
215
                # only by operations like sign-my-commits, and thus will not
216
                # tend to grow rapdily or without bound like commit containing
217
                # packs do - leave them alone as packing them really should
218
                # group their data with the relevant commit, and that may
219
                # involve rewriting ancient history - which autopack tries to
220
                # avoid. Alternatively we could not group the data but treat
221
                # each of these as having a single revision, and thus add 
222
                # one revision for each to the total revision count, to get
223
                # a matching distribution.
224
                continue
225
            existing_packs.append((revision_count, transport_and_name))
226
        pack_operations = self.plan_autopack_combinations(
227
            existing_packs, pack_distribution)
228
        self._execute_pack_operations(pack_operations)
229
        return True
230
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
231
    def create_pack_from_packs(self, revision_index_map, inventory_index_map,
2592.3.111 by Robert Collins
Make the temporary back for autopacking vs pack to pack fetching distinguishable.
232
        text_index_map, signature_index_map, suffix, revision_ids=None):
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
233
        """Create a new pack by reading data from other packs.
234
235
        This does little more than a bulk copy of data. One key difference
236
        is that data with the same item key across multiple packs is elided
237
        from the output. The new pack is written into the current pack store
238
        along with its indices, and the name added to the pack names. The 
239
        source packs are not altered.
240
241
        :param revision_index_map: A revision index map.
242
        :param inventory_index_map: A inventory index map.
243
        :param text_index_map: A text index map.
244
        :param signature_index_map: A signature index map.
245
        :param revision_ids: Either None, to copy all data, or a list
246
            of revision_ids to limit the copied data to the data they
247
            introduced.
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
248
        :return: A Pack object, or None if nothing was copied.
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
249
        """
250
        # open a pack - using the same name as the last temporary file
251
        # - which has already been flushed, so its safe.
252
        # XXX: - duplicate code warning with start_write_group; fix before
253
        #      considering 'done'.
254
        if getattr(self.repo, '_open_pack_tuple', None) is not None:
255
            raise errors.BzrError('call to create_pack_from_packs while '
256
                'another pack is being written.')
2592.3.112 by Robert Collins
Various fixups found dogfooding.
257
        if revision_ids is not None and len(revision_ids) == 0:
258
            # silly fetch request.
259
            return None
2592.3.111 by Robert Collins
Make the temporary back for autopacking vs pack to pack fetching distinguishable.
260
        random_name = self.repo.control_files._lock.nonce + suffix
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
261
        if 'fetch' in debug.debug_flags:
262
            plain_pack_list = ['%s%s' % (transport.base, name) for
263
                transport, name in revision_index_map.itervalues()]
2592.3.112 by Robert Collins
Various fixups found dogfooding.
264
            if revision_ids is not None:
265
                rev_count = len(revision_ids)
266
            else:
267
                rev_count = 'all'
268
            mutter('%s: create_pack: creating pack from source packs: '
269
                '%s%s %s revisions wanted %s t=0',
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
270
                time.ctime(), self.repo._upload_transport.base, random_name,
2592.3.112 by Robert Collins
Various fixups found dogfooding.
271
                plain_pack_list, rev_count)
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
272
            start_time = time.time()
2592.3.97 by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken]
273
        write_stream = self.repo._upload_transport.open_write_stream(random_name)
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
274
        if 'fetch' in debug.debug_flags:
275
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
276
                time.ctime(), self.repo._upload_transport.base, random_name,
277
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
278
        pack_hash = md5.new()
2592.3.94 by Robert Collins
Buffer writes to packs, giving a significant performance boost over sftp.
279
        buffer = []
2592.3.97 by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken]
280
        def write_data(bytes, update=pack_hash.update, write=write_stream.write):
2592.3.94 by Robert Collins
Buffer writes to packs, giving a significant performance boost over sftp.
281
            buffer.append(bytes)
282
            if len(buffer) == 640:
283
                bytes = ''.join(buffer)
2592.3.97 by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken]
284
                write(bytes)
2592.3.94 by Robert Collins
Buffer writes to packs, giving a significant performance boost over sftp.
285
                update(bytes)
286
                del buffer[:]
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
287
        writer = pack.ContainerWriter(write_data)
288
        writer.begin()
289
        # open new indices
290
        revision_index = InMemoryGraphIndex(reference_lists=1)
291
        inv_index = InMemoryGraphIndex(reference_lists=2)
292
        text_index = InMemoryGraphIndex(reference_lists=2, key_elements=2)
293
        signature_index = InMemoryGraphIndex(reference_lists=0)
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
294
        # select revisions
295
        if revision_ids:
296
            revision_keys = [(revision_id,) for revision_id in revision_ids]
297
        else:
298
            revision_keys = None
299
        revision_nodes = self._index_contents(revision_index_map, revision_keys)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
300
        # copy revision keys and adjust values
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
301
        list(self._copy_nodes_graph(revision_nodes, revision_index_map, writer,
302
            revision_index))
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
303
        if 'fetch' in debug.debug_flags:
304
            mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
305
                time.ctime(), self.repo._upload_transport.base, random_name,
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
306
                revision_index.key_count(),
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
307
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
308
        # select inventory keys
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
309
        inv_keys = revision_keys # currently the same keyspace
310
        inv_nodes = self._index_contents(inventory_index_map, inv_keys)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
311
        # copy inventory keys and adjust values
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
312
        # XXX: Should be a helper function to allow different inv representation
313
        # at this point.
314
        inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,
315
            writer, inv_index, output_lines=True)
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
316
        if revision_ids:
317
            fileid_revisions = self.repo._find_file_ids_from_xml_inventory_lines(
318
                inv_lines, revision_ids)
319
            text_filter = []
320
            for fileid, file_revids in fileid_revisions.iteritems():
321
                text_filter.extend(
322
                    [(fileid, file_revid) for file_revid in file_revids])
323
        else:
324
            list(inv_lines)
325
            text_filter = None
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
326
        if 'fetch' in debug.debug_flags:
327
            mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
328
                time.ctime(), self.repo._upload_transport.base, random_name,
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
329
                inv_index.key_count(),
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
330
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
331
        # select text keys
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
332
        text_nodes = self._index_contents(text_index_map, text_filter)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
333
        # copy text keys and adjust values
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
334
        list(self._copy_nodes_graph(text_nodes, text_index_map, writer,
335
            text_index))
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
336
        if 'fetch' in debug.debug_flags:
337
            mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
338
                time.ctime(), self.repo._upload_transport.base, random_name,
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
339
                text_index.key_count(),
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
340
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
341
        # select signature keys
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
342
        signature_filter = revision_keys # same keyspace
343
        signature_nodes = self._index_contents(signature_index_map,
344
            signature_filter)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
345
        # copy signature keys and adjust values
346
        self._copy_nodes(signature_nodes, signature_index_map, writer, signature_index)
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
347
        if 'fetch' in debug.debug_flags:
348
            mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
349
                time.ctime(), self.repo._upload_transport.base, random_name,
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
350
                signature_index.key_count(),
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
351
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
352
        # finish the pack
353
        writer.end()
2592.3.94 by Robert Collins
Buffer writes to packs, giving a significant performance boost over sftp.
354
        if len(buffer):
355
            bytes = ''.join(buffer)
2592.3.97 by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken]
356
            write_stream.write(bytes)
2592.3.94 by Robert Collins
Buffer writes to packs, giving a significant performance boost over sftp.
357
            pack_hash.update(bytes)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
358
        new_name = pack_hash.hexdigest()
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
359
        # if nothing has been written, discard the new pack.
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
360
        if 0 == sum((revision_index.key_count(),
361
            inv_index.key_count(),
362
            text_index.key_count(),
363
            signature_index.key_count(),
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
364
            )):
365
            self.repo._upload_transport.delete(random_name)
366
            return None
367
        result = Pack()
368
        result.name = new_name
369
        result.transport = self.repo._upload_transport.clone('../packs/')
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
370
        # write indices
371
        index_transport = self.repo._upload_transport.clone('../indices')
372
        rev_index_name = self.repo._revision_store.name_to_revision_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
373
        revision_index_length = index_transport.put_file(rev_index_name,
374
            revision_index.finish())
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
375
        if 'fetch' in debug.debug_flags:
376
            # XXX: size might be interesting?
377
            mutter('%s: create_pack: wrote revision index: %s%s t+%6.3fs',
378
                time.ctime(), self.repo._upload_transport.base, random_name,
379
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
380
        inv_index_name = self.repo._inv_thunk.name_to_inv_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
381
        inventory_index_length = index_transport.put_file(inv_index_name,
382
            inv_index.finish())
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
383
        if 'fetch' in debug.debug_flags:
384
            # XXX: size might be interesting?
385
            mutter('%s: create_pack: wrote inventory index: %s%s t+%6.3fs',
386
                time.ctime(), self.repo._upload_transport.base, random_name,
387
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
388
        text_index_name = self.repo.weave_store.name_to_text_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
389
        text_index_length = index_transport.put_file(text_index_name,
390
            text_index.finish())
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
391
        if 'fetch' in debug.debug_flags:
392
            # XXX: size might be interesting?
393
            mutter('%s: create_pack: wrote file texts index: %s%s t+%6.3fs',
394
                time.ctime(), self.repo._upload_transport.base, random_name,
395
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
396
        signature_index_name = self.repo._revision_store.name_to_signature_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
397
        signature_index_length = index_transport.put_file(signature_index_name,
398
            signature_index.finish())
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
399
        if 'fetch' in debug.debug_flags:
400
            # XXX: size might be interesting?
401
            mutter('%s: create_pack: wrote revision signatures index: %s%s t+%6.3fs',
402
                time.ctime(), self.repo._upload_transport.base, random_name,
403
                time.time() - start_time)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
404
        # add to name
405
        self.allocate(new_name, revision_index_length, inventory_index_length,
406
            text_index_length, signature_index_length)
407
        # rename into place. XXX: should rename each index too rather than just
408
        # uploading blind under the chosen name.
409
        write_stream.close()
410
        self.repo._upload_transport.rename(random_name, '../packs/' + new_name + '.pack')
411
        if 'fetch' in debug.debug_flags:
412
            # XXX: size might be interesting?
413
            mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
414
                time.ctime(), self.repo._upload_transport.base, random_name,
415
                result.transport, result.name,
416
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
417
        result.revision_index = revision_index
418
        result.inventory_index = inv_index
419
        result.text_index = text_index
420
        result.signature_index = signature_index
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
421
        if 'fetch' in debug.debug_flags:
422
            # XXX: size might be interesting?
423
            mutter('%s: create_pack: finished: %s%s t+%6.3fs',
424
                time.ctime(), self.repo._upload_transport.base, random_name,
425
                time.time() - start_time)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
426
        return result
427
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
428
    def _execute_pack_operations(self, pack_operations):
429
        """Execute a series of pack operations.
430
431
        :param pack_operations: A list of [revision_count, packs_to_combine].
432
        :return: None.
433
        """
434
        for revision_count, pack_details in pack_operations:
435
            # we may have no-ops from the setup logic
436
            if len(pack_details) == 0:
437
                continue
438
            # have a progress bar?
439
            self._combine_packs(pack_details)
440
            for pack_detail in pack_details:
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
441
                self._remove_pack_by_name(pack_detail[1])
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
442
        # record the newly available packs and stop advertising the old
443
        # packs
444
        self.save()
445
        # move the old packs out of the way
446
        for revision_count, pack_details in pack_operations:
447
            self._obsolete_packs(pack_details)
448
449
    def pack(self):
450
        """Pack the pack collection totally."""
451
        self.ensure_loaded()
452
        try:
453
            total_packs = len(self._names)
454
            if total_packs < 2:
455
                return
456
            if self.repo._revision_all_indices is None:
457
                # trigger creation of the all revision index.
458
                self.repo._revision_store.get_revision_file(self.repo.get_transaction())
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
459
            total_revisions = self.repo._revision_all_indices.key_count()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
460
            # XXX: the following may want to be a class, to pack with a given
461
            # policy.
462
            mutter('Packing repository %s, which has %d pack files, '
2592.3.112 by Robert Collins
Various fixups found dogfooding.
463
                'containing %d revisions into 1 packs.', self, total_packs,
464
                total_revisions)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
465
            # determine which packs need changing
466
            pack_distribution = [1]
467
            pack_operations = [[0, []]]
468
            for index, transport_and_name in self.repo._revision_pack_map.iteritems():
469
                if index is None:
470
                    continue
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
471
                revision_count = index.key_count()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
472
                pack_operations[-1][0] += revision_count
473
                pack_operations[-1][1].append(transport_and_name)
474
            self._execute_pack_operations(pack_operations)
475
        finally:
476
            if not self.repo.is_in_write_group():
477
                self.reset()
478
479
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
480
        if len(existing_packs) <= len(pack_distribution):
481
            return []
482
        existing_packs.sort(reverse=True)
483
        pack_operations = [[0, []]]
484
        # plan out what packs to keep, and what to reorganise
485
        while len(existing_packs):
486
            # take the largest pack, and if its less than the head of the
487
            # distribution chart we will include its contents in the new pack for
488
            # that position. If its larger, we remove its size from the
489
            # distribution chart
490
            next_pack_rev_count, next_pack_details = existing_packs.pop(0)
491
            if next_pack_rev_count >= pack_distribution[0]:
492
                # this is already packed 'better' than this, so we can
493
                # not waste time packing it.
494
                while next_pack_rev_count > 0:
495
                    next_pack_rev_count -= pack_distribution[0]
496
                    if next_pack_rev_count >= 0:
497
                        # more to go
498
                        del pack_distribution[0]
499
                    else:
500
                        # didn't use that entire bucket up
501
                        pack_distribution[0] = -next_pack_rev_count
502
            else:
503
                # add the revisions we're going to add to the next output pack
504
                pack_operations[-1][0] += next_pack_rev_count
505
                # allocate this pack to the next pack sub operation
506
                pack_operations[-1][1].append(next_pack_details)
507
                if pack_operations[-1][0] >= pack_distribution[0]:
508
                    # this pack is used up, shift left.
509
                    del pack_distribution[0]
510
                    pack_operations.append([0, []])
511
        
512
        return pack_operations
513
514
    def _combine_packs(self, pack_details):
515
        """Combine the data from the packs listed in pack_details.
516
517
        This does little more than a bulk copy of data. One key difference
518
        is that data with the same item key across multiple packs is elided
519
        from the output. The new pack is written into the current pack store
520
        along with its indices, and the name added to the pack names. The 
521
        source packs are not altered.
522
523
        :param pack_details: A list of tuples with the transport and pack name
524
            in use.
525
        :return: None
526
        """
527
        # select revision keys
528
        revision_index_map = self._revision_index_map(pack_details)
529
        # select inventory keys
530
        inv_index_map = self._inv_index_map(pack_details)
531
        # select text keys
532
        text_index_map = self._text_index_map(pack_details)
533
        # select signature keys
534
        signature_index_map = self._signature_index_map(pack_details)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
535
        self.create_pack_from_packs(revision_index_map, inv_index_map,
2592.3.111 by Robert Collins
Make the temporary back for autopacking vs pack to pack fetching distinguishable.
536
            text_index_map, signature_index_map, '.autopack')
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
537
538
    def _copy_nodes(self, nodes, index_map, writer, write_index):
539
        # plan a readv on each source pack:
540
        # group by pack
541
        nodes = sorted(nodes)
542
        # how to map this into knit.py - or knit.py into this?
543
        # we don't want the typical knit logic, we want grouping by pack
544
        # at this point - perhaps a helper library for the following code 
545
        # duplication points?
546
        request_groups = {}
547
        for index, key, value in nodes:
548
            if index not in request_groups:
549
                request_groups[index] = []
550
            request_groups[index].append((key, value))
551
        for index, items in request_groups.iteritems():
552
            pack_readv_requests = []
553
            for key, value in items:
554
                # ---- KnitGraphIndex.get_position
555
                bits = value[1:].split(' ')
556
                offset, length = int(bits[0]), int(bits[1])
557
                pack_readv_requests.append((offset, length, (key, value[0])))
558
            # linear scan up the pack
559
            pack_readv_requests.sort()
560
            # copy the data
561
            transport, path = index_map[index]
562
            reader = pack.make_readv_reader(transport, path,
563
                [offset[0:2] for offset in pack_readv_requests])
564
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
565
                izip(reader.iter_records(), pack_readv_requests):
566
                raw_data = read_func(None)
567
                pos, size = writer.add_bytes_record(raw_data, names)
568
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
569
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
570
    def _copy_nodes_graph(self, nodes, index_map, writer, write_index,
571
        output_lines=False):
572
        """Copy knit nodes between packs.
573
574
        :param output_lines: Return lines present in the copied data as
575
            an iterator.
576
        """
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
577
        # for record verification
578
        knit_data = _KnitData(None)
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
579
        # for line extraction when requested (inventories only)
580
        if output_lines:
581
            factory = knit.KnitPlainFactory()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
582
        # plan a readv on each source pack:
583
        # group by pack
584
        nodes = sorted(nodes)
585
        # how to map this into knit.py - or knit.py into this?
586
        # we don't want the typical knit logic, we want grouping by pack
587
        # at this point - perhaps a helper library for the following code 
588
        # duplication points?
589
        request_groups = {}
590
        for index, key, value, references in nodes:
591
            if index not in request_groups:
592
                request_groups[index] = []
593
            request_groups[index].append((key, value, references))
594
        for index, items in request_groups.iteritems():
595
            pack_readv_requests = []
596
            for key, value, references in items:
597
                # ---- KnitGraphIndex.get_position
598
                bits = value[1:].split(' ')
599
                offset, length = int(bits[0]), int(bits[1])
600
                pack_readv_requests.append((offset, length, (key, value[0], references)))
601
            # linear scan up the pack
602
            pack_readv_requests.sort()
603
            # copy the data
604
            transport, path = index_map[index]
605
            reader = pack.make_readv_reader(transport, path,
606
                [offset[0:2] for offset in pack_readv_requests])
607
            for (names, read_func), (_1, _2, (key, eol_flag, references)) in \
608
                izip(reader.iter_records(), pack_readv_requests):
609
                raw_data = read_func(None)
2592.3.104 by Robert Collins
hackish fix, but all tests passing again.
610
                if output_lines:
611
                    # read the entire thing
612
                    content, _ = knit_data._parse_record(key[-1], raw_data)
613
                    if len(references[-1]) == 0:
614
                        line_iterator = factory.get_fulltext_content(content)
615
                    else:
616
                        line_iterator = factory.get_linedelta_content(content)
617
                    for line in line_iterator:
618
                        yield line
619
                else:
620
                    # check the header only
621
                    df, _ = knit_data._parse_record_header(key[-1], raw_data)
622
                    df.close()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
623
                pos, size = writer.add_bytes_record(raw_data, names)
624
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
625
626
    def ensure_loaded(self):
627
        if self._names is None:
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
628
            self._names = {}
629
            for index, key, value in \
630
                GraphIndex(self.transport, 'pack-names').iter_all_entries():
631
                name = key[0]
632
                sizes = [int(digits) for digits in value.split(' ')]
633
                self._names[name] = sizes
634
635
    def allocate(self, name, revision_index_length, inventory_index_length,
636
        text_index_length, signature_index_length):
637
        """Allocate name in the list of packs.
638
639
        :param name: The basename - e.g. the md5 hash hexdigest.
640
        :param revision_index_length: The length of the revision index in
641
            bytes.
642
        :param inventory_index_length: The length of the inventory index in
643
            bytes.
644
        :param text_index_length: The length of the text index in bytes.
645
        :param signature_index_length: The length of the signature index in
646
            bytes.
647
        """
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
648
        self.ensure_loaded()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
649
        if name in self._names:
650
            raise errors.DuplicateKey(name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
651
        self._names[name] = (revision_index_length, inventory_index_length,
652
            text_index_length, signature_index_length)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
653
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
654
    def _make_index_map(self, suffix):
655
        """Return information on existing indexes.
656
657
        :param suffix: Index suffix added to pack name.
658
659
        :returns: (pack_map, indices) where indices is a list of GraphIndex 
660
        objects, and pack_map is a mapping from those objects to the 
661
        pack tuple they describe.
662
        """
663
        indices = []
664
        pack_map = {}
665
        self.ensure_loaded()
666
        for name in self.names():
667
            # TODO: maybe this should expose size to us  to allow
668
            # sorting of the indices for better performance ?
669
            index_name = name + suffix
670
            new_index = GraphIndex(self._index_transport, index_name)
671
            indices.append(new_index)
672
            pack_map[new_index] = self.repo._pack_tuple(name)
673
        return pack_map, indices
674
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
675
    def _max_pack_count(self, total_revisions):
676
        """Return the maximum number of packs to use for total revisions.
677
        
678
        :param total_revisions: The total number of revisions in the
679
            repository.
680
        """
681
        if not total_revisions:
682
            return 1
683
        digits = str(total_revisions)
684
        result = 0
685
        for digit in digits:
686
            result += int(digit)
687
        return result
688
689
    def names(self):
690
        """Provide an order to the underlying names."""
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
691
        return sorted(self._names.keys())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
692
693
    def _obsolete_packs(self, pack_details):
694
        """Move a number of packs which have been obsoleted out of the way.
695
696
        Each pack and its associated indices are moved out of the way.
697
698
        Note: for correctness this function should only be called after a new
699
        pack names index has been written without these pack names, and with
700
        the names of packs that contain the data previously available via these
701
        packs.
702
703
        :param pack_details: The transport, name tuples for the packs.
704
        :param return: None.
705
        """
706
        for pack_detail in pack_details:
707
            pack_detail[0].rename(pack_detail[1],
708
                '../obsolete_packs/' + pack_detail[1])
709
            basename = pack_detail[1][:-4]
710
            index_transport = pack_detail[0].clone('../indices')
711
            for suffix in ('iix', 'six', 'tix', 'rix'):
712
                index_transport.rename(basename + suffix,
713
                    '../obsolete_packs/' + basename + suffix)
714
715
    def pack_distribution(self, total_revisions):
716
        """Generate a list of the number of revisions to put in each pack.
717
718
        :param total_revisions: The total number of revisions in the
719
            repository.
720
        """
721
        if total_revisions == 0:
722
            return [0]
723
        digits = reversed(str(total_revisions))
724
        result = []
725
        for exponent, count in enumerate(digits):
726
            size = 10 ** exponent
727
            for pos in range(int(count)):
728
                result.append(size)
729
        return list(reversed(result))
730
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
731
    def _remove_pack_by_name(self, name):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
732
        # strip .pack
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
733
        self._names.pop(name[:-5])
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
734
735
    def reset(self):
736
        self._names = None
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
737
        self.packs = []
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
738
739
    def _inv_index_map(self, pack_details):
740
        """Get a map of inv index -> packs for pack_details."""
741
        # the simplest thing for now is to create new index objects.
742
        # this should really reuse the existing index objects for these 
743
        # packs - this means making the way they are managed in the repo be 
744
        # more sane.
745
        indices = {}
746
        for transport, name in pack_details:
747
            index_name = name[:-5]
748
            index_name = self.repo._inv_thunk.name_to_inv_index_name(index_name)
749
            indices[GraphIndex(transport.clone('../indices'), index_name)] = \
750
                (transport, name)
751
        return indices
752
753
    def _revision_index_map(self, pack_details):
754
        """Get a map of revision index -> packs for pack_details."""
755
        # the simplest thing for now is to create new index objects.
756
        # this should really reuse the existing index objects for these 
757
        # packs - this means making the way they are managed in the repo be 
758
        # more sane.
759
        indices = {}
760
        for transport, name in pack_details:
761
            index_name = name[:-5]
762
            index_name = self.repo._revision_store.name_to_revision_index_name(index_name)
763
            indices[GraphIndex(transport.clone('../indices'), index_name)] = \
764
                (transport, name)
765
        return indices
766
767
    def _signature_index_map(self, pack_details):
768
        """Get a map of signature index -> packs for pack_details."""
769
        # the simplest thing for now is to create new index objects.
770
        # this should really reuse the existing index objects for these 
771
        # packs - this means making the way they are managed in the repo be 
772
        # more sane.
773
        indices = {}
774
        for transport, name in pack_details:
775
            index_name = name[:-5]
776
            index_name = self.repo._revision_store.name_to_signature_index_name(index_name)
777
            indices[GraphIndex(transport.clone('../indices'), index_name)] = \
778
                (transport, name)
779
        return indices
780
781
    def _text_index_map(self, pack_details):
782
        """Get a map of text index -> packs for pack_details."""
783
        # the simplest thing for now is to create new index objects.
784
        # this should really reuse the existing index objects for these 
785
        # packs - this means making the way they are managed in the repo be 
786
        # more sane.
787
        indices = {}
788
        for transport, name in pack_details:
789
            index_name = name[:-5]
790
            index_name = self.repo.weave_store.name_to_text_index_name(index_name)
791
            indices[GraphIndex(transport.clone('../indices'), index_name)] = \
792
                (transport, name)
793
        return indices
794
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
795
    def _index_contents(self, pack_map, key_filter=None):
796
        """Get an iterable of the index contents from a pack_map.
797
798
        :param pack_map: A map from indices to pack details.
799
        :param key_filter: An optional filter to limit the
800
            keys returned.
801
        """
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
802
        indices = [index for index in pack_map.iterkeys()]
803
        all_index = CombinedGraphIndex(indices)
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
804
        if key_filter is None:
805
            return all_index.iter_all_entries()
806
        else:
807
            return all_index.iter_entries(key_filter)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
808
809
    def save(self):
810
        builder = GraphIndexBuilder()
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
811
        for name, sizes in self._names.iteritems():
812
            builder.add_node((name, ), ' '.join(str(size) for size in sizes))
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
813
        self.transport.put_file('pack-names', builder.finish())
814
815
    def setup(self):
816
        # cannot add names if we're not in a 'write lock'.
817
        if self.repo.control_files._lock_mode != 'w':
818
            raise errors.NotWriteLocked(self)
819
2592.5.6 by Martin Pool
Move pack repository start_write_group to pack collection object
820
    def _start_write_group(self):
821
        random_name = self.repo.control_files._lock.nonce
822
        self.repo._open_pack_tuple = (self.repo._upload_transport, random_name + '.pack')
823
        write_stream = self.repo._upload_transport.open_write_stream(random_name + '.pack')
824
        self.repo._write_stream = write_stream
825
        self.repo._open_pack_hash = md5.new()
826
        def write_data(bytes, write=write_stream.write,
827
                       update=self.repo._open_pack_hash.update):
828
            write(bytes)
829
            update(bytes)
830
        self.repo._open_pack_writer = pack.ContainerWriter(write_data)
831
        self.repo._open_pack_writer.begin()
832
        self.setup()
833
        self.repo._revision_store.setup()
834
        self.repo.weave_store.setup()
835
        self.repo._inv_thunk.setup()
836
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
837
    def _commit_write_group(self):
838
        data_inserted = (self.repo._revision_store.data_inserted() or
839
            self.repo.weave_store.data_inserted() or 
840
            self.repo._inv_thunk.data_inserted())
841
        if data_inserted:
842
            self.repo._open_pack_writer.end()
843
            new_name = self.repo._open_pack_hash.hexdigest()
844
            new_pack = Pack()
845
            new_pack.name = new_name
846
            new_pack.transport = self.repo._upload_transport.clone('../packs/')
847
            # To populate:
848
            # new_pack.revision_index = 
849
            # new_pack.inventory_index = 
850
            # new_pack.text_index = 
851
            # new_pack.signature_index = 
852
            self.repo.weave_store.flush(new_name, new_pack)
853
            self.repo._inv_thunk.flush(new_name, new_pack)
854
            self.repo._revision_store.flush(new_name, new_pack)
855
            self.repo._write_stream.close()
856
            self.repo._upload_transport.rename(self.repo._open_pack_tuple[1],
857
                '../packs/' + new_name + '.pack')
858
            # If this fails, its a hash collision. We should:
859
            # - determine if its a collision or
860
            # - the same content or
861
            # - the existing name is not the actual hash - e.g.
862
            #   its a deliberate attack or data corruption has
863
            #   occuring during the write of that file.
864
            self.allocate(new_name, new_pack.revision_index_length,
865
                new_pack.inventory_index_length, new_pack.text_index_length,
866
                new_pack.signature_index_length)
867
            self.repo._open_pack_tuple = None
868
            if not self.autopack():
869
                self.save()
870
        else:
871
            # remove the pending upload
872
            self.repo._upload_transport.delete(self.repo._open_pack_tuple[1])
873
        self.repo._revision_store.reset()
874
        self.repo.weave_store.reset()
875
        self.repo._inv_thunk.reset()
876
        # forget what names there are - should just refresh and deal with the
877
        # delta.
878
        self.reset()
879
        self.repo._open_pack_hash = None
880
        self.repo._write_stream = None
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
881
882
class GraphKnitRevisionStore(KnitRevisionStore):
883
    """An object to adapt access from RevisionStore's to use GraphKnits.
884
885
    This should not live through to production: by production time we should
886
    have fully integrated the new indexing and have new data for the
887
    repository classes; also we may choose not to do a Knit1 compatible
888
    new repository, just a Knit3 one. If neither of these happen, this 
889
    should definately be cleaned up before merging.
890
891
    This class works by replacing the original RevisionStore.
892
    We need to do this because the GraphKnitRevisionStore is less
893
    isolated in its layering - it uses services from the repo.
894
    """
895
896
    def __init__(self, repo, transport, revisionstore):
897
        """Create a GraphKnitRevisionStore on repo with revisionstore.
898
899
        This will store its state in the Repository, use the
900
        indices FileNames to provide a KnitGraphIndex,
901
        and at the end of transactions write new indices.
902
        """
903
        KnitRevisionStore.__init__(self, revisionstore.versioned_file_store)
904
        self.repo = repo
905
        self._serializer = revisionstore._serializer
906
        self.transport = transport
907
908
    def get_revision_file(self, transaction):
909
        """Get the revision versioned file object."""
910
        if getattr(self.repo, '_revision_knit', None) is not None:
911
            return self.repo._revision_knit
912
        self.repo._packs.ensure_loaded()
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
913
        pack_map, indices = self.repo._packs._make_index_map('.rix')
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
914
        if self.repo.is_in_write_group():
915
            # allow writing: queue writes to a new index
916
            indices.insert(0, self.repo._revision_write_index)
917
            pack_map[self.repo._revision_write_index] = self.repo._open_pack_tuple
918
            writer = self.repo._open_pack_writer, self.repo._revision_write_index
919
            add_callback = self.repo._revision_write_index.add_nodes
920
        else:
921
            writer = None
922
            add_callback = None # no data-adding permitted.
923
        self.repo._revision_all_indices = CombinedGraphIndex(indices)
924
        knit_index = KnitGraphIndex(self.repo._revision_all_indices,
925
            add_callback=add_callback)
926
        knit_access = _PackAccess(pack_map, writer)
927
        self.repo._revision_pack_map = pack_map
928
        self.repo._revision_knit_access = knit_access
929
        self.repo._revision_knit = knit.KnitVersionedFile(
930
            'revisions', self.transport.clone('..'),
931
            self.repo.control_files._file_mode,
932
            create=False, access_mode=self.repo.control_files._lock_mode,
933
            index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
934
            access_method=knit_access)
935
        return self.repo._revision_knit
936
937
    def get_signature_file(self, transaction):
938
        """Get the signature versioned file object."""
939
        if getattr(self.repo, '_signature_knit', None) is not None:
940
            return self.repo._signature_knit
941
        indices = []
942
        self.repo._packs.ensure_loaded()
943
        pack_map = {}
944
        for name in self.repo._packs.names():
945
            # TODO: maybe this should expose size to us  to allow
946
            # sorting of the indices for better performance ?
947
            index_name = self.name_to_signature_index_name(name)
948
            indices.append(GraphIndex(self.transport, index_name))
949
            pack_map[indices[-1]] = (self.repo._pack_tuple(name))
950
        if self.repo.is_in_write_group():
951
            # allow writing: queue writes to a new index
952
            indices.insert(0, self.repo._signature_write_index)
953
            pack_map[self.repo._signature_write_index] = self.repo._open_pack_tuple
954
            writer = self.repo._open_pack_writer, self.repo._signature_write_index
955
            add_callback = self.repo._signature_write_index.add_nodes
956
        else:
957
            writer = None
958
            add_callback = None # no data-adding permitted.
959
        self.repo._signature_all_indices = CombinedGraphIndex(indices)
960
        knit_index = KnitGraphIndex(self.repo._signature_all_indices,
961
            add_callback=add_callback, parents=False)
962
        knit_access = _PackAccess(pack_map, writer)
963
        self.repo._signature_knit_access = knit_access
964
        self.repo._signature_knit = knit.KnitVersionedFile(
965
            'signatures', self.transport.clone('..'),
966
            self.repo.control_files._file_mode,
967
            create=False, access_mode=self.repo.control_files._lock_mode,
968
            index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
969
            access_method=knit_access)
970
        return self.repo._signature_knit
971
972
    def data_inserted(self):
973
        if (getattr(self.repo, '_revision_write_index', None) and
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
974
            self.repo._revision_write_index.key_count()):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
975
            return True
976
        if (getattr(self.repo, '_signature_write_index', None) and
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
977
            self.repo._signature_write_index.key_count()):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
978
            return True
979
        return False
980
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
981
    def flush(self, new_name, new_pack):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
982
        """Write out pending indices."""
983
        # write a revision index (might be empty)
984
        new_index_name = self.name_to_revision_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
985
        new_pack.revision_index_length = self.transport.put_file(
986
            new_index_name, self.repo._revision_write_index.finish())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
987
        if self.repo._revision_all_indices is None:
988
            # create a pack map for the autopack code - XXX finish
989
            # making a clear managed list of packs, indices and use
990
            # that in these mapping classes
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
991
            self.repo._revision_pack_map = self.repo._packs._make_index_map('.rix')[0]
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
992
        else:
993
            del self.repo._revision_pack_map[self.repo._revision_write_index]
994
            self.repo._revision_write_index = None
995
            new_index = GraphIndex(self.transport, new_index_name)
996
            self.repo._revision_pack_map[new_index] = (self.repo._pack_tuple(new_name))
997
            # revisions 'knit' accessed : update it.
998
            self.repo._revision_all_indices.insert_index(0, new_index)
999
            # remove the write buffering index. XXX: API break
1000
            # - clearly we need a remove_index call too.
1001
            del self.repo._revision_all_indices._indices[1]
1002
            # reset the knit access writer
1003
            self.repo._revision_knit_access.set_writer(None, None, (None, None))
1004
1005
        # write a signatures index (might be empty)
1006
        new_index_name = self.name_to_signature_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1007
        new_pack.signature_index_length = self.transport.put_file(
1008
            new_index_name, self.repo._signature_write_index.finish())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1009
        self.repo._signature_write_index = None
1010
        if self.repo._signature_all_indices is not None:
1011
            # sigatures 'knit' accessed : update it.
1012
            self.repo._signature_all_indices.insert_index(0,
1013
                GraphIndex(self.transport, new_index_name))
1014
            # remove the write buffering index. XXX: API break
1015
            # - clearly we need a remove_index call too.
1016
            del self.repo._signature_all_indices._indices[1]
1017
            # reset the knit access writer
1018
            self.repo._signature_knit_access.set_writer(None, None, (None, None))
1019
1020
    def name_to_revision_index_name(self, name):
1021
        """The revision index is the name + .rix."""
1022
        return name + '.rix'
1023
1024
    def name_to_signature_index_name(self, name):
1025
        """The signature index is the name + .six."""
1026
        return name + '.six'
1027
1028
    def reset(self):
1029
        """Clear all cached data."""
1030
        # cached revision data
1031
        self.repo._revision_knit = None
1032
        self.repo._revision_write_index = None
1033
        self.repo._revision_all_indices = None
1034
        self.repo._revision_knit_access = None
1035
        # cached signature data
1036
        self.repo._signature_knit = None
1037
        self.repo._signature_write_index = None
1038
        self.repo._signature_all_indices = None
1039
        self.repo._signature_knit_access = None
1040
1041
    def setup(self):
1042
        # setup in-memory indices to accumulate data.
1043
        self.repo._revision_write_index = InMemoryGraphIndex(1)
1044
        self.repo._signature_write_index = InMemoryGraphIndex(0)
1045
        # if knit indices have been handed out, add a mutable
1046
        # index to them
1047
        if self.repo._revision_knit is not None:
1048
            self.repo._revision_all_indices.insert_index(0, self.repo._revision_write_index)
1049
            self.repo._revision_knit._index._add_callback = self.repo._revision_write_index.add_nodes
1050
            self.repo._revision_knit_access.set_writer(self.repo._open_pack_writer,
1051
                self.repo._revision_write_index, self.repo._open_pack_tuple)
1052
        if self.repo._signature_knit is not None:
1053
            self.repo._signature_all_indices.insert_index(0, self.repo._signature_write_index)
1054
            self.repo._signature_knit._index._add_callback = self.repo._signature_write_index.add_nodes
1055
            self.repo._signature_knit_access.set_writer(self.repo._open_pack_writer,
1056
                self.repo._signature_write_index, self.repo._open_pack_tuple)
1057
1058
1059
class GraphKnitTextStore(VersionedFileStore):
1060
    """An object to adapt access from VersionedFileStore's to use GraphKnits.
1061
1062
    This should not live through to production: by production time we should
1063
    have fully integrated the new indexing and have new data for the
1064
    repository classes; also we may choose not to do a Knit1 compatible
1065
    new repository, just a Knit3 one. If neither of these happen, this 
1066
    should definately be cleaned up before merging.
1067
1068
    This class works by replacing the original VersionedFileStore.
1069
    We need to do this because the GraphKnitRevisionStore is less
1070
    isolated in its layering - it uses services from the repo and shares them
1071
    with all the data written in a single write group.
1072
    """
1073
1074
    def __init__(self, repo, transport, weavestore):
1075
        """Create a GraphKnitTextStore on repo with weavestore.
1076
1077
        This will store its state in the Repository, use the
1078
        indices FileNames to provide a KnitGraphIndex,
1079
        and at the end of transactions write new indices.
1080
        """
1081
        # don't call base class constructor - its not suitable.
1082
        # no transient data stored in the transaction
1083
        # cache.
1084
        self._precious = False
1085
        self.repo = repo
1086
        self.transport = transport
1087
        self.weavestore = weavestore
1088
        # XXX for check() which isn't updated yet
1089
        self._transport = weavestore._transport
1090
1091
    def data_inserted(self):
1092
        # XXX: Should we define __len__ for indices?
1093
        if (getattr(self.repo, '_text_write_index', None) and
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
1094
            self.repo._text_write_index.key_count()):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1095
            return True
1096
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1097
    def _ensure_all_index(self, for_write=None):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1098
        """Create the combined index for all texts."""
1099
        if getattr(self.repo, '_text_all_indices', None) is not None:
1100
            return
1101
        indices = []
1102
        self.repo._packs.ensure_loaded()
1103
        self.repo._text_pack_map = {}
1104
        for name in self.repo._packs.names():
1105
            # TODO: maybe this should expose size to us  to allow
1106
            # sorting of the indices for better performance ?
1107
            index_name = self.name_to_text_index_name(name)
1108
            indices.append(GraphIndex(self.transport, index_name))
1109
            self.repo._text_pack_map[indices[-1]] = (self.repo._pack_tuple(name))
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1110
        if for_write or self.repo.is_in_write_group():
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1111
            # allow writing: queue writes to a new index
1112
            indices.insert(0, self.repo._text_write_index)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1113
        self._setup_knit(self.repo.is_in_write_group())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1114
        self.repo._text_all_indices = CombinedGraphIndex(indices)
1115
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1116
    def flush(self, new_name, new_pack):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1117
        """Write the index out to new_name."""
1118
        # write a revision index (might be empty)
1119
        new_index_name = self.name_to_text_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1120
        new_pack.text_index_length = self.transport.put_file(
1121
            new_index_name, self.repo._text_write_index.finish())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1122
        self.repo._text_write_index = None
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1123
        self._setup_knit(False)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1124
        if self.repo._text_all_indices is not None:
1125
            # text 'knits' have been used, replace the mutated memory index
1126
            # with the new on-disk one. XXX: is this really a good idea?
1127
            # perhaps just keep using the memory one ?
1128
            self.repo._text_all_indices.insert_index(0,
1129
                GraphIndex(self.transport, new_index_name))
1130
            # remove the write buffering index. XXX: API break
1131
            # - clearly we need a remove_index call too.
1132
            del self.repo._text_all_indices._indices[1]
1133
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1134
    def get_weave_or_empty(self, file_id, transaction, force_write=False):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1135
        """Get a 'Knit' backed by the .tix indices.
1136
1137
        The transaction parameter is ignored.
1138
        """
1139
        self._ensure_all_index()
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1140
        if force_write or self.repo.is_in_write_group():
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1141
            add_callback = self.repo._text_write_index.add_nodes
1142
            self.repo._text_pack_map[self.repo._text_write_index] = self.repo._open_pack_tuple
1143
        else:
1144
            add_callback = None # no data-adding permitted.
1145
1146
        file_id_index = GraphIndexPrefixAdapter(self.repo._text_all_indices,
1147
            (file_id, ), 1, add_nodes_callback=add_callback)
1148
        knit_index = KnitGraphIndex(file_id_index,
1149
            add_callback=file_id_index.add_nodes,
1150
            deltas=True, parents=True)
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1151
        return knit.KnitVersionedFile('text:' + file_id, None,
1152
            None,
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1153
            index=knit_index,
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1154
            access_method=self.repo._text_knit_access)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1155
1156
    get_weave = get_weave_or_empty
1157
1158
    def __iter__(self):
1159
        """Generate a list of the fileids inserted, for use by check."""
1160
        self._ensure_all_index()
1161
        ids = set()
1162
        for index, key, value, refs in self.repo._text_all_indices.iter_all_entries():
1163
            ids.add(key[0])
1164
        return iter(ids)
1165
1166
    def name_to_text_index_name(self, name):
1167
        """The text index is the name + .tix."""
1168
        return name + '.tix'
1169
1170
    def reset(self):
1171
        """Clear all cached data."""
1172
        # remove any accumlating index of text data
1173
        self.repo._text_write_index = None
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1174
        # no access object.
1175
        self.repo._text_knit_access = None
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1176
        # no write-knit
1177
        self.repo._text_knit = None
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1178
        # remove all constructed text data indices
1179
        self.repo._text_all_indices = None
1180
        # and the pack map
1181
        self.repo._text_pack_map = None
1182
1183
    def setup(self):
1184
        # setup in-memory indices to accumulate data.
1185
        self.repo._text_write_index = InMemoryGraphIndex(reference_lists=2,
1186
            key_elements=2)
1187
        # we require that text 'knits' be accessed from within the write 
1188
        # group to be able to be written to, simply because it makes this
1189
        # code cleaner - we don't need to track all 'open' knits and 
1190
        # adjust them.
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1191
        # prepare to do writes.
1192
        self._ensure_all_index(True)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1193
        self._setup_knit(True)
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1194
    
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1195
    def _setup_knit(self, for_write):
2592.3.130 by Robert Collins
Reduce object creation volume during commit.
1196
        if for_write:
1197
            writer = (self.repo._open_pack_writer, self.repo._text_write_index)
1198
        else:
1199
            writer = None
1200
        self.repo._text_knit_access = _PackAccess(
1201
            self.repo._text_pack_map, writer)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1202
        if for_write:
1203
            # a reused knit object for commit specifically.
1204
            self.repo._text_knit = self.get_weave_or_empty(
1205
                'all-texts', self.repo.get_transaction(), for_write)
1206
        else:
1207
            self.repo._text_knit = None
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1208
1209
1210
class InventoryKnitThunk(object):
1211
    """An object to manage thunking get_inventory_weave to pack based knits."""
1212
1213
    def __init__(self, repo, transport):
1214
        """Create an InventoryKnitThunk for repo at transport.
1215
1216
        This will store its state in the Repository, use the
1217
        indices FileNames to provide a KnitGraphIndex,
1218
        and at the end of transactions write a new index..
1219
        """
1220
        self.repo = repo
1221
        self.transport = transport
1222
1223
    def data_inserted(self):
1224
        # XXX: Should we define __len__ for indices?
1225
        if (getattr(self.repo, '_inv_write_index', None) and
2592.3.106 by Robert Collins
Use the new index.key_count API to reduce full index iteration.
1226
            self.repo._inv_write_index.key_count()):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1227
            return True
1228
1229
    def _ensure_all_index(self):
1230
        """Create the combined index for all inventories."""
1231
        if getattr(self.repo, '_inv_all_indices', None) is not None:
1232
            return
1233
        indices = []
1234
        self.repo._packs.ensure_loaded()
1235
        pack_map = {}
1236
        for name in self.repo._packs.names():
1237
            # TODO: maybe this should expose size to us  to allow
1238
            # sorting of the indices for better performance ?
1239
            index_name = self.name_to_inv_index_name(name)
1240
            indices.append(GraphIndex(self.transport, index_name))
1241
            pack_map[indices[-1]] = (self.repo._pack_tuple(name))
1242
        if self.repo.is_in_write_group():
1243
            # allow writing: queue writes to a new index
1244
            indices.append(self.repo._inv_write_index)
1245
        self.repo._inv_all_indices = CombinedGraphIndex(indices)
1246
        self.repo._inv_pack_map = pack_map
1247
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1248
    def flush(self, new_name, new_pack):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1249
        """Write the index out to new_name."""
1250
        # write an index (might be empty)
1251
        new_index_name = self.name_to_inv_index_name(new_name)
2592.3.118 by Robert Collins
Record the size of the index files in the pack-names index.
1252
        new_pack.inventory_index_length = self.transport.put_file(
1253
            new_index_name, self.repo._inv_write_index.finish())
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1254
        self.repo._inv_write_index = None
1255
        if self.repo._inv_all_indices is not None:
1256
            # inv 'knit' has been used, replace the mutated memory index
1257
            # with the new on-disk one. XXX: is this really a good idea?
1258
            # perhaps just keep using the memory one ?
1259
            self.repo._inv_all_indices.insert_index(0,
1260
                GraphIndex(self.transport, new_index_name))
1261
            # remove the write buffering index. XXX: API break
1262
            # - clearly we need a remove_index call too.
1263
            del self.repo._inv_all_indices._indices[1]
1264
            self.repo._inv_knit_access.set_writer(None, None, (None, None))
1265
        self.repo._inv_pack_map = None
1266
1267
    def get_weave(self):
1268
        """Get a 'Knit' that contains inventory data."""
1269
        self._ensure_all_index()
1270
        filename = 'inventory'
1271
        if self.repo.is_in_write_group():
1272
            add_callback = self.repo._inv_write_index.add_nodes
1273
            self.repo._inv_pack_map[self.repo._inv_write_index] = self.repo._open_pack_tuple
1274
            writer = self.repo._open_pack_writer, self.repo._inv_write_index
1275
        else:
1276
            add_callback = None # no data-adding permitted.
1277
            writer = None
1278
1279
        knit_index = KnitGraphIndex(self.repo._inv_all_indices,
1280
            add_callback=add_callback,
1281
            deltas=True, parents=True)
1282
        # TODO - mode support. self.weavestore._file_mode,
1283
        knit_access = _PackAccess(self.repo._inv_pack_map, writer)
1284
        self.repo._inv_knit_access = knit_access
1285
        return knit.KnitVersionedFile('inventory', self.transport.clone('..'),
1286
            index=knit_index,
1287
            factory=knit.KnitPlainFactory(),
1288
            access_method=knit_access)
1289
1290
    def name_to_inv_index_name(self, name):
1291
        """The inv index is the name + .iix."""
1292
        return name + '.iix'
1293
1294
    def reset(self):
1295
        """Clear all cached data."""
1296
        # remove any accumlating index of inv data
1297
        self.repo._inv_write_index = None
1298
        # remove all constructed inv data indices
1299
        self.repo._inv_all_indices = None
1300
        # remove the knit access object
1301
        self.repo._inv_knit_access = None
1302
        self.repo._inv_pack_map = None
1303
1304
    def setup(self):
1305
        # setup in-memory indices to accumulate data.
1306
        # - we want to map compression only, but currently the knit code hasn't
1307
        # been updated enough to understand that, so we have a regular 2-list
1308
        # index giving parents and compression source.
1309
        self.repo._inv_write_index = InMemoryGraphIndex(reference_lists=2)
1310
        # if we have created an inventory index, add the new write index to it
1311
        if getattr(self.repo, '_inv_all_indices', None) is not None:
1312
            self.repo._inv_all_indices.insert_index(0, self.repo._inv_write_index)
1313
            # we don't bother updating the knit layer, because there is not
1314
            # defined interface for adding inventories that should need the 
1315
            # existing knit to be changed - its all behind 'repo.add_inventory'.
1316
1317
1318
class GraphKnitRepository1(KnitRepository):
1319
    """Experimental graph-knit using repository."""
1320
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1321
    _commit_builder_class = PackCommitBuilder
1322
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1323
    def __init__(self, _format, a_bzrdir, control_files, _revision_store,
1324
                 control_store, text_store):
1325
        KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1326
                              _revision_store, control_store, text_store)
1327
        index_transport = control_files._transport.clone('indices')
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
1328
        self._packs = RepositoryPackCollection(self, control_files._transport,
1329
                index_transport)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1330
        self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
1331
        self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
1332
        self._inv_thunk = InventoryKnitThunk(self, index_transport)
1333
        self._upload_transport = control_files._transport.clone('upload')
1334
        self._pack_transport = control_files._transport.clone('packs')
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1335
        # for tests
1336
        self._reconcile_does_inventory_gc = False
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1337
1338
    def _abort_write_group(self):
1339
        # FIXME: just drop the transient index.
1340
        self._revision_store.reset()
1341
        self.weave_store.reset()
1342
        self._inv_thunk.reset()
1343
        # forget what names there are
1344
        self._packs.reset()
1345
        self._open_pack_hash = None
1346
1347
    def _pack_tuple(self, name):
1348
        """Return a tuple with the transport and file name for a pack name."""
1349
        return self._pack_transport, name + '.pack'
1350
1351
    def _refresh_data(self):
1352
        if self.control_files._lock_count==1:
1353
            self._revision_store.reset()
1354
            self.weave_store.reset()
1355
            self._inv_thunk.reset()
1356
            # forget what names there are
1357
            self._packs.reset()
1358
1359
    def _start_write_group(self):
2592.5.6 by Martin Pool
Move pack repository start_write_group to pack collection object
1360
        self._packs._start_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1361
1362
    def _commit_write_group(self):
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
1363
        return self._packs._commit_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1364
1365
    def get_inventory_weave(self):
1366
        return self._inv_thunk.get_weave()
1367
1368
    @needs_write_lock
1369
    def pack(self):
1370
        """Compress the data within the repository.
1371
1372
        This will pack all the data to a single pack. In future it may
1373
        recompress deltas or do other such expensive operations.
1374
        """
1375
        self._packs.pack()
1376
1377
    @needs_write_lock
1378
    def reconcile(self, other=None, thorough=False):
1379
        """Reconcile this repository."""
1380
        from bzrlib.reconcile import PackReconciler
1381
        reconciler = PackReconciler(self, thorough=thorough)
1382
        reconciler.reconcile()
1383
        return reconciler
1384
1385
    def reconcile_actions(self):
1386
        """Return a set of actions taken by reconcile on this repository.
1387
 
1388
        Pack repositories currently perform no reconciliation.
1389
1390
        :return: A set of actions. e.g. set(['inventory_gc']).
1391
        """
1392
        return set([])
1393
1394
1395
class GraphKnitRepository3(KnitRepository3):
1396
    """Experimental graph-knit using subtrees repository."""
1397
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
1398
    _commit_builder_class = PackRootCommitBuilder
1399
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1400
    def __init__(self, _format, a_bzrdir, control_files, _revision_store,
1401
                 control_store, text_store):
1402
        KnitRepository3.__init__(self, _format, a_bzrdir, control_files,
1403
                              _revision_store, control_store, text_store)
1404
        index_transport = control_files._transport.clone('indices')
2592.5.5 by Martin Pool
Make RepositoryPackCollection remember the index transport, and responsible for getting a map of indexes
1405
        self._packs = RepositoryPackCollection(self, control_files._transport,
1406
            index_transport)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1407
        self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
1408
        self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
1409
        self._inv_thunk = InventoryKnitThunk(self, index_transport)
1410
        self._upload_transport = control_files._transport.clone('upload')
1411
        self._pack_transport = control_files._transport.clone('packs')
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1412
        # for tests
1413
        self._reconcile_does_inventory_gc = False
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1414
1415
    def _abort_write_group(self):
1416
        # FIXME: just drop the transient index.
1417
        self._revision_store.reset()
1418
        self.weave_store.reset()
1419
        self._inv_thunk.reset()
1420
        # forget what names there are
1421
        self._packs.reset()
1422
        self._open_pack_hash = None
1423
1424
    def _pack_tuple(self, name):
1425
        """Return a tuple with the transport and file name for a pack name."""
1426
        return self._pack_transport, name + '.pack'
1427
1428
    def _refresh_data(self):
1429
        if self.control_files._lock_count==1:
1430
            self._revision_store.reset()
1431
            self.weave_store.reset()
1432
            self._inv_thunk.reset()
1433
            # forget what names there are
1434
            self._packs.reset()
1435
1436
    def _start_write_group(self):
2592.5.6 by Martin Pool
Move pack repository start_write_group to pack collection object
1437
        self._packs._start_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1438
1439
    def _commit_write_group(self):
2592.5.7 by Martin Pool
move commit_write_group to RepositoryPackCollection
1440
        return self._packs._commit_write_group()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
1441
1442
    def get_inventory_weave(self):
1443
        return self._inv_thunk.get_weave()
1444
1445
    @needs_write_lock
1446
    def pack(self):
1447
        """Compress the data within the repository.
1448
1449
        This will pack all the data to a single pack. In future it may
1450
        recompress deltas or do other such expensive operations.
1451
        """
1452
        self._packs.pack()
1453
1454
    @needs_write_lock
1455
    def reconcile(self, other=None, thorough=False):
1456
        """Reconcile this repository."""
1457
        from bzrlib.reconcile import PackReconciler
1458
        reconciler = PackReconciler(self, thorough=thorough)
1459
        reconciler.reconcile()
1460
        return reconciler
1461
1462
    def reconcile_actions(self):
1463
        """Return a set of actions taken by reconcile on this repository.
1464
        
1465
        :return: A set of actions. e.g. set(['inventory_gc']).
1466
        """
1467
        return set([])
1468
1469
1470
class RepositoryFormatPack(MetaDirRepositoryFormat):
1471
    """Format logic for pack structured repositories.
1472
1473
    This repository format has:
1474
     - a list of packs in pack-names
1475
     - packs in packs/NAME.pack
1476
     - indices in indices/NAME.{iix,six,tix,rix}
1477
     - knit deltas in the packs, knit indices mapped to the indices.
1478
     - thunk objects to support the knits programming API.
1479
     - a format marker of its own
1480
     - an optional 'shared-storage' flag
1481
     - an optional 'no-working-trees' flag
1482
     - a LockDir lock
1483
    """
1484
1485
    def _get_control_store(self, repo_transport, control_files):
1486
        """Return the control store for this repository."""
1487
        return VersionedFileStore(
1488
            repo_transport,
1489
            prefixed=False,
1490
            file_mode=control_files._file_mode,
1491
            versionedfile_class=knit.KnitVersionedFile,
1492
            versionedfile_kwargs={'factory':knit.KnitPlainFactory()},
1493
            )
1494
1495
    def _get_revision_store(self, repo_transport, control_files):
1496
        """See RepositoryFormat._get_revision_store()."""
1497
        versioned_file_store = VersionedFileStore(
1498
            repo_transport,
1499
            file_mode=control_files._file_mode,
1500
            prefixed=False,
1501
            precious=True,
1502
            versionedfile_class=knit.KnitVersionedFile,
1503
            versionedfile_kwargs={'delta':False,
1504
                                  'factory':knit.KnitPlainFactory(),
1505
                                 },
1506
            escaped=True,
1507
            )
1508
        return KnitRevisionStore(versioned_file_store)
1509
1510
    def _get_text_store(self, transport, control_files):
1511
        """See RepositoryFormat._get_text_store()."""
1512
        return self._get_versioned_file_store('knits',
1513
                                  transport,
1514
                                  control_files,
1515
                                  versionedfile_class=knit.KnitVersionedFile,
1516
                                  versionedfile_kwargs={
1517
                                      'create_parent_dir':True,
1518
                                      'delay_create':True,
1519
                                      'dir_mode':control_files._dir_mode,
1520
                                  },
1521
                                  escaped=True)
1522
1523
    def initialize(self, a_bzrdir, shared=False):
1524
        """Create a pack based repository.
1525
1526
        :param a_bzrdir: bzrdir to contain the new repository; must already
1527
            be initialized.
1528
        :param shared: If true the repository will be initialized as a shared
1529
                       repository.
1530
        """
1531
        mutter('creating repository in %s.', a_bzrdir.transport.base)
1532
        dirs = ['indices', 'obsolete_packs', 'packs', 'upload']
1533
        builder = GraphIndexBuilder()
1534
        files = [('pack-names', builder.finish())]
1535
        utf8_files = [('format', self.get_format_string())]
1536
        
1537
        self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
1538
        return self.open(a_bzrdir=a_bzrdir, _found=True)
1539
1540
    def open(self, a_bzrdir, _found=False, _override_transport=None):
1541
        """See RepositoryFormat.open().
1542
        
1543
        :param _override_transport: INTERNAL USE ONLY. Allows opening the
1544
                                    repository at a slightly different url
1545
                                    than normal. I.e. during 'upgrade'.
1546
        """
1547
        if not _found:
1548
            format = RepositoryFormat.find_format(a_bzrdir)
1549
            assert format.__class__ ==  self.__class__
1550
        if _override_transport is not None:
1551
            repo_transport = _override_transport
1552
        else:
1553
            repo_transport = a_bzrdir.get_repository_transport(None)
1554
        control_files = lockable_files.LockableFiles(repo_transport,
1555
                                'lock', lockdir.LockDir)
1556
        text_store = self._get_text_store(repo_transport, control_files)
1557
        control_store = self._get_control_store(repo_transport, control_files)
1558
        _revision_store = self._get_revision_store(repo_transport, control_files)
1559
        return self.repository_class(_format=self,
1560
                              a_bzrdir=a_bzrdir,
1561
                              control_files=control_files,
1562
                              _revision_store=_revision_store,
1563
                              control_store=control_store,
1564
                              text_store=text_store)
1565
1566
1567
class RepositoryFormatGraphKnit1(RepositoryFormatPack):
1568
    """Experimental pack based repository with knit1 style data.
1569
1570
    This repository format has:
1571
     - knits for file texts and inventory
1572
     - hash subdirectory based stores.
1573
     - knits for revisions and signatures
1574
     - uses a GraphKnitIndex for revisions.knit.
1575
     - TextStores for revisions and signatures.
1576
     - a format marker of its own
1577
     - an optional 'shared-storage' flag
1578
     - an optional 'no-working-trees' flag
1579
     - a LockDir lock
1580
1581
    This format was introduced in bzr.dev.
1582
    """
1583
1584
    repository_class = GraphKnitRepository1
1585
1586
    def _get_matching_bzrdir(self):
1587
        return bzrdir.format_registry.make_bzrdir('experimental')
1588
1589
    def _ignore_setting_bzrdir(self, format):
1590
        pass
1591
1592
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1593
1594
    def __ne__(self, other):
1595
        return self.__class__ is not other.__class__
1596
1597
    def get_format_string(self):
1598
        """See RepositoryFormat.get_format_string()."""
1599
        return "Bazaar Experimental no-subtrees\n"
1600
1601
    def get_format_description(self):
1602
        """See RepositoryFormat.get_format_description()."""
1603
        return "Experimental no-subtrees"
1604
1605
    def check_conversion_target(self, target_format):
1606
        pass
1607
1608
1609
class RepositoryFormatGraphKnit3(RepositoryFormatPack):
1610
    """Experimental repository with knit3 style data.
1611
1612
    This repository format has:
1613
     - knits for file texts and inventory
1614
     - hash subdirectory based stores.
1615
     - knits for revisions and signatures
1616
     - uses a GraphKnitIndex for revisions.knit.
1617
     - TextStores for revisions and signatures.
1618
     - a format marker of its own
1619
     - an optional 'shared-storage' flag
1620
     - an optional 'no-working-trees' flag
1621
     - a LockDir lock
1622
     - support for recording full info about the tree root
1623
     - support for recording tree-references
1624
    """
1625
1626
    repository_class = GraphKnitRepository3
1627
    rich_root_data = True
1628
    supports_tree_reference = True
1629
1630
    def _get_matching_bzrdir(self):
1631
        return bzrdir.format_registry.make_bzrdir('experimental-subtree')
1632
1633
    def _ignore_setting_bzrdir(self, format):
1634
        pass
1635
1636
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1637
1638
    def check_conversion_target(self, target_format):
1639
        if not target_format.rich_root_data:
1640
            raise errors.BadConversionTarget(
1641
                'Does not support rich root data.', target_format)
1642
        if not getattr(target_format, 'supports_tree_reference', False):
1643
            raise errors.BadConversionTarget(
1644
                'Does not support nested trees', target_format)
1645
            
1646
    def get_format_string(self):
1647
        """See RepositoryFormat.get_format_string()."""
1648
        return "Bazaar Experimental subtrees\n"
1649
1650
    def get_format_description(self):
1651
        """See RepositoryFormat.get_format_description()."""
1652
        return "Experimental no-subtrees\n"