~bzr-pqm/bzr/bzr.dev

974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
1
# Copyright (C) 2005 by Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1218 by Martin Pool
- fix up import
16
17
import sys
18
import os
1231 by Martin Pool
- more progress on fetch on top of weaves
19
from cStringIO import StringIO
1218 by Martin Pool
- fix up import
20
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
21
import bzrlib.errors
1231 by Martin Pool
- more progress on fetch on top of weaves
22
from bzrlib.trace import mutter, note, warning
23
from bzrlib.branch import Branch, INVENTORY_FILEID, ANCESTRY_FILEID
974.1.28 by aaron.bentley at utoronto
factored install_revisions out of update_revisions, updated test cases for greedy_fetch
24
from bzrlib.progress import ProgressBar
1231 by Martin Pool
- more progress on fetch on top of weaves
25
from bzrlib.xml5 import serializer_v5
26
from bzrlib.osutils import sha_string, split_lines
27
28
"""Copying of history from one branch to another.
29
30
The basic plan is that every branch knows the history of everything
31
that has merged into it.  As the first step of a merge, pull, or
32
branch operation we copy history from the source into the destination
33
branch.
34
35
The copying is done in a slightly complicated order.  We don't want to
36
add a revision to the store until everything it refers to is also
37
stored, so that if a revision is present we can totally recreate it.
38
However, we can't know what files are included in a revision until we
39
read its inventory.  Therefore, we first pull the XML and hold it in
40
memory until we've updated all of the files referenced.
41
"""
42
43
# TODO: Avoid repeatedly opening weaves so many times.
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
44
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
45
46
def greedy_fetch(to_branch, from_branch, revision, pb):
47
    f = Fetcher(to_branch, from_branch, revision, pb)
48
    return f.count_copied, f.failed_revisions
49
50
51
class Fetcher(object):
1231 by Martin Pool
- more progress on fetch on top of weaves
52
    """Pull history from one branch to another.
53
54
    revision_limit
55
        If set, pull only up to this revision_id.
56
        """
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
57
    def __init__(self, to_branch, from_branch, revision_limit=None, pb=None):
58
        self.to_branch = to_branch
59
        self.from_branch = from_branch
60
        self.revision_limit = revision_limit
1231 by Martin Pool
- more progress on fetch on top of weaves
61
        self.failed_revisions = []
62
        self.count_copied = 0
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
63
        if pb is None:
64
            self.pb = bzrlib.ui.ui_factory.progress_bar()
65
        else:
66
            self.pb = pb
1231 by Martin Pool
- more progress on fetch on top of weaves
67
        self._load_histories()
68
        revs_to_fetch = self._compare_ancestries()
69
        self._copy_revisions(revs_to_fetch)
70
        # - get a list of revisions that need to be pulled in
71
        # - for each one, pull in that revision file
72
        #   and get the inventory, and store the inventory with right
73
        #   parents.
74
        # - and get the ancestry, and store that with right parents too
75
        # - and keep a note of all file ids and version seen
76
        # - then go through all files; for each one get the weave,
77
        #   and add in all file versions
78
79
80
    def _load_histories(self):
81
        """Load histories of both branches, up to the limit."""
82
        self.from_history = self.from_branch.revision_history()
83
        self.to_history = self.to_branch.revision_history()
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
84
        if self.revision_limit:
1231 by Martin Pool
- more progress on fetch on top of weaves
85
            assert isinstance(revision_limit, basestring)
86
            try:
87
                rev_index = self.from_history.index(revision_limit)
88
            except ValueError:
89
                rev_index = None
90
            if rev_index is not None:
91
                self.from_history = self.from_history[:rev_index + 1]
92
            else:
93
                self.from_history = [revision]
94
            
95
96
    def _compare_ancestries(self):
97
        """Get a list of revisions that must be copied.
98
99
        That is, every revision that's in the ancestry of the source
100
        branch and not in the destination branch."""
101
        if self.from_history:
102
            self.from_ancestry = self.from_branch.get_ancestry(self.from_history[-1])
103
        else:
104
            self.from_ancestry = []
105
        if self.to_history:
106
            self.to_history = self.to_branch.get_ancestry(self.to_history[-1])
107
        else:
108
            self.to_history = []
109
        ss = set(self.to_history)
110
        to_fetch = []
111
        for rev_id in self.from_ancestry:
112
            if rev_id not in ss:
113
                to_fetch.append(rev_id)
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
114
                mutter('need to get revision {%s}', rev_id)
1231 by Martin Pool
- more progress on fetch on top of weaves
115
        mutter('need to get %d revisions in total', len(to_fetch))
116
        return to_fetch
117
                
118
119
120
    def _copy_revisions(self, revs_to_fetch):
121
        for rev_id in revs_to_fetch:
122
            self._copy_one_revision(rev_id)
123
124
125
    def _copy_one_revision(self, rev_id):
126
        """Copy revision and everything referenced by it."""
127
        mutter('copying revision {%s}', rev_id)
128
        rev_xml = self.from_branch.get_revision_xml(rev_id)
129
        inv_xml = self.from_branch.get_inventory_xml(rev_id)
130
        rev = serializer_v5.read_revision_from_string(rev_xml)
131
        inv = serializer_v5.read_inventory_from_string(inv_xml)
132
        assert rev.revision_id == rev_id
133
        assert rev.inventory_sha1 == sha_string(inv_xml)
134
        mutter('  commiter %s, %d parents',
135
               rev.committer,
136
               len(rev.parents))
137
        self._copy_new_texts(rev_id, inv)
138
        self.to_branch.weave_store.add_text(INVENTORY_FILEID, rev_id,
139
                                            split_lines(inv_xml), rev.parents)
140
        self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)
141
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
142
        
1231 by Martin Pool
- more progress on fetch on top of weaves
143
    def _copy_new_texts(self, rev_id, inv):
144
        """Copy any new texts occuring in this revision."""
145
        # TODO: Rather than writing out weaves every time, hold them
146
        # in memory until everything's done?  But this way is nicer
147
        # if it's interrupted.
148
        for path, ie in inv.iter_entries():
149
            if ie.kind != 'file':
150
                continue
151
            if ie.text_version != rev_id:
152
                continue
153
            mutter('%s {%s} is changed in this revision',
154
                   path, ie.file_id)
155
            self._copy_one_text(rev_id, ie.file_id)
156
157
158
    def _copy_one_text(self, rev_id, file_id):
159
        """Copy one file text."""
160
        from_weave = self.from_branch.weave_store.get_weave(file_id)
161
        from_idx = from_weave.lookup(rev_id)
162
        from_parents = map(from_weave.idx_to_name, from_weave.parents(from_idx))
163
        text_lines = from_weave.get(from_idx)
164
        to_weave = self.to_branch.weave_store.get_weave_or_empty(file_id)
165
        if rev_id in to_weave._name_map:
166
            warning('version {%s} already present in weave of file {%s}',
167
                    rev_id, file_id)
168
            return
169
        to_parents = map(to_weave.lookup, from_parents)
170
        to_weave.add(rev_id, to_parents, text_lines)
171
        self.to_branch.weave_store.put_weave(file_id, to_weave)
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
172
    
173
1139 by Martin Pool
- merge in merge improvements and additional tests
174
def has_revision(branch, revision_id):
175
    try:
1231 by Martin Pool
- more progress on fetch on top of weaves
176
        branch.get_revision_xml_file(revision_id)
1139 by Martin Pool
- merge in merge improvements and additional tests
177
        return True
178
    except bzrlib.errors.NoSuchRevision:
179
        return False
180
181
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
182
def old_greedy_fetch(to_branch, from_branch, revision=None, pb=None):
183
    """Copy all history from one branch to another.
184
185
    revision
186
        If set, copy only up to this point in the source branch.
187
188
    @returns: number copied, missing ids       
974.1.29 by aaron.bentley at utoronto
Got greedy fetch working when the desired revision's not in the revision history
189
    """
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
190
    from_history = from_branch.revision_history()
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
191
    required_revisions = set(from_history)
192
    all_failed = set()
193
    if revision is not None:
194
        required_revisions.add(revision)
974.1.29 by aaron.bentley at utoronto
Got greedy fetch working when the desired revision's not in the revision history
195
        try:
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
196
            rev_index = from_history.index(revision)
974.1.29 by aaron.bentley at utoronto
Got greedy fetch working when the desired revision's not in the revision history
197
        except ValueError:
198
            rev_index = None
199
        if rev_index is not None:
200
            from_history = from_history[:rev_index + 1]
201
        else:
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
202
            from_history = [revision]
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
203
    to_history = to_branch.revision_history()
204
    missing = []
205
    for rev_id in from_history:
206
        if not has_revision(to_branch, rev_id):
207
            missing.append(rev_id)
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
208
209
    # recurse down through the revision graph, looking for things that
210
    # can't be found.
974.1.29 by aaron.bentley at utoronto
Got greedy fetch working when the desired revision's not in the revision history
211
    count = 0
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
212
    while len(missing) > 0:
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
213
        installed, failed = to_branch.install_revisions(from_branch, 
974.1.33 by aaron.bentley at utoronto
Added greedy_fetch to update_revisions
214
                                                        revision_ids=missing,
215
                                                        pb=pb)
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
216
        count += installed
217
        required_failed = failed.intersection(required_revisions)
218
        if len(required_failed) > 0:
219
            raise bzrlib.errors.InstallFailed(required_failed)
220
        for rev_id in failed:
221
            note("Failed to install %s" % rev_id)
222
        all_failed.update(failed)
974.1.49 by Aaron Bentley
TEST NEEDED: fixed fetch when same revision is added twice to new_missing
223
        new_missing = set() 
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
224
        for rev_id in missing:
225
            try:
226
                revision = from_branch.get_revision(rev_id)
227
            except bzrlib.errors.NoSuchRevision:
228
                if revision in from_history:
229
                    raise
230
                else:
231
                    continue
232
            for parent in [p.revision_id for p in revision.parents]:
233
                if not has_revision(to_branch, parent):
974.1.49 by Aaron Bentley
TEST NEEDED: fixed fetch when same revision is added twice to new_missing
234
                    new_missing.add(parent)
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
235
        missing = new_missing
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
236
    return count, all_failed
974.1.27 by aaron.bentley at utoronto
Initial greedy fetch work
237
238
1219 by Martin Pool
- BROKEN: start refactoring fetch code to work well with weaves
239
def old_install_revisions(branch, other, revision_ids, pb):
240
    """Copy revisions from other branch into branch.
241
242
    This is a lower-level function used by a pull or a merge.  It
243
    incorporates some history from one branch into another, but
244
    does not update the revision history or operate on the working
245
    copy.
246
247
    revision_ids
248
        Sequence of revisions to copy.
249
250
    pb
251
        Progress bar for copying.
252
    """
253
    if False:
254
        if hasattr(other.revision_store, "prefetch"):
255
            other.revision_store.prefetch(revision_ids)
256
        if hasattr(other.inventory_store, "prefetch"):
257
            other.inventory_store.prefetch(revision_ids)
258
259
    if pb is None:
260
        pb = bzrlib.ui.ui_factory.progress_bar()
261
262
    revisions = []
263
    needed_texts = set()
264
    i = 0
265
266
    failures = set()
267
    for i, rev_id in enumerate(revision_ids):
268
        pb.update('fetching revision', i+1, len(revision_ids))
269
        try:
270
            rev = other.get_revision(rev_id)
271
        except bzrlib.errors.NoSuchRevision:
272
            failures.add(rev_id)
273
            continue
274
275
        revisions.append(rev)
276
        inv = other.get_inventory(rev_id)
277
        for key, entry in inv.iter_entries():
278
            if entry.text_id is None:
279
                continue
280
            if entry.text_id not in branch.text_store:
281
                needed_texts.add(entry.text_id)
282
283
    pb.clear()
284
285
    count, cp_fail = branch.text_store.copy_multi(other.text_store, 
286
                                                needed_texts)
287
    count, cp_fail = branch.inventory_store.copy_multi(other.inventory_store, 
288
                                                     revision_ids)
289
    count, cp_fail = branch.revision_store.copy_multi(other.revision_store, 
290
                                                    revision_ids,
291
                                                    permit_failure=True)
292
    assert len(cp_fail) == 0 
293
    return count, failures
294
295