~bzr-pqm/bzr/bzr.dev

0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
6
from bzrlib.tree import Tree
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
9
from bzrlib.trace import mutter
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
10
from bzrlib.errors import BzrError
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
11
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
12
class BadChangeset(Exception): pass
13
class MalformedHeader(BadChangeset): pass
14
class MalformedPatches(BadChangeset): pass
15
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
16
0.5.11 by John Arbash Meinel
Working on properly representing renames.
17
def _unescape(name):
18
    """Now we want to find the filename effected.
19
    Unfortunately the filename is written out as
20
    repr(filename), which means that it surrounds
21
    the name with quotes which may be single or double
22
    (single is preferred unless there is a single quote in
23
    the filename). And some characters will be escaped.
24
25
    TODO:   There has to be some pythonic way of undo-ing the
26
            representation of a string rather than using eval.
27
    """
28
    delimiter = name[0]
29
    if name[-1] != delimiter:
30
        raise BadChangeset('Could not properly parse the'
31
                ' filename: %r' % name)
32
    # We need to handle escaped hexadecimals too.
33
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
34
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
35
class RevisionInfo(object):
36
    """Gets filled out for each revision object that is read.
37
    """
38
    def __init__(self, rev_id):
39
        self.rev_id = rev_id
40
        self.sha1 = None
41
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
42
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
43
        self.timestamp = None
44
        self.timezone = None
45
        self.inventory_id = None
46
        self.inventory_sha1 = None
47
48
        self.parents = None
49
        self.message = None
50
51
    def __str__(self):
52
        return pprint.pformat(self.__dict__)
53
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
54
    def as_revision(self):
55
        from bzrlib.revision import Revision, RevisionReference
56
        rev = Revision(revision_id=self.rev_id,
57
            committer=self.committer,
58
            timestamp=float(self.timestamp),
59
            timezone=int(self.timezone),
60
            inventory_id=self.inventory_id,
61
            inventory_sha1=self.inventory_sha1,
62
            message='\n'.join(self.message))
63
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
64
        if self.parents:
65
            for parent in self.parents:
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
66
                rev_id, sha1 = parent.split()
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
67
                rev.parents.append(RevisionReference(rev_id, sha1))
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
68
69
        return rev
70
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
71
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
72
    """This contains the meta information. Stuff that allows you to
73
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
74
    """
75
    def __init__(self):
76
        self.committer = None
77
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
78
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
79
        self.base = None
80
        self.base_sha1 = None
81
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
82
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
83
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
84
85
        self.actions = []
86
87
        # The next entries are created during complete_info() and
88
        # other post-read functions.
89
90
        # A list of real Revision objects
91
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
92
93
        self.timestamp = None
94
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
95
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
96
    def __str__(self):
97
        return pprint.pformat(self.__dict__)
98
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
99
    def complete_info(self):
100
        """This makes sure that all information is properly
101
        split up, based on the assumptions that can be made
102
        when information is missing.
103
        """
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
104
        from common import unpack_highres_date
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
105
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
106
        if not self.timestamp and self.date:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
107
            self.timestamp, self.timezone = unpack_highres_date(self.date)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
108
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
109
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
110
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
111
            if rev.timestamp is None:
112
                if rev.date is not None:
113
                    rev.timestamp, rev.timezone = \
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
114
                            unpack_highres_date(rev.date)
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
115
                else:
116
                    rev.timestamp = self.timestamp
117
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
118
            if rev.message is None and self.message:
119
                rev.message = self.message
120
            if rev.committer is None and self.committer:
121
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
122
            if rev.inventory_id is None:
123
                rev.inventory_id = rev.rev_id
124
            self.real_revisions.append(rev.as_revision())
125
126
        if self.base is None:
127
            # When we don't have a base, then the real base
128
            # is the first parent of the first revision listed
129
            rev = self.real_revisions[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
130
            if len(rev.parents) == 0:
131
                # There is no base listed, and
132
                # the lowest revision doesn't have a parent
133
                # so this is probably against the empty tree
134
                # and thus base truly is None
135
                self.base = None
136
                self.base_sha1 = None
137
            else:
138
                self.base = rev.parents[0].revision_id
139
                # In general, if self.base is None, self.base_sha1 should
140
                # also be None
141
                if self.base_sha1 is not None:
142
                    assert self.base_sha1 == rev.parents[0].revision_sha1
143
                self.base_sha1 = rev.parents[0].revision_sha1
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
144
0.5.67 by John Arbash Meinel
Working on apply_changeset
145
    def _get_target(self):
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
146
        """Return the target revision."""
0.5.67 by John Arbash Meinel
Working on apply_changeset
147
        if len(self.real_revisions) > 0:
148
            return self.real_revisions[-1].revision_id
149
        elif len(self.revisions) > 0:
150
            return self.revisions[-1].rev_id
151
        return None
152
153
    target = property(_get_target, doc='The target revision id')
154
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
155
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
156
    """This class reads in a changeset from a file, and returns
157
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
    """
159
    def __init__(self, from_file):
160
        """Read in the changeset from the file.
161
162
        :param from_file: A file-like object (must have iterator support).
163
        """
164
        object.__init__(self)
165
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
166
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
167
        
168
        self.info = ChangesetInfo()
169
        # We put the actual inventory ids in the footer, so that the patch
170
        # is easier to read for humans.
171
        # Unfortunately, that means we need to read everything before we
172
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
173
        self._read()
174
        self._validate()
175
176
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
177
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
178
        self._read_patches()
179
        self._read_footer()
180
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
181
    def _validate(self):
182
        """Make sure that the information read in makes sense
183
        and passes appropriate checksums.
184
        """
185
        # Fill in all the missing blanks for the revisions
186
        # and generate the real_revisions list.
187
        self.info.complete_info()
188
        self._validate_revisions()
189
190
    def _validate_revisions(self):
191
        """Make sure all revision entries match their checksum."""
192
        from bzrlib.xml import pack_xml
193
        from cStringIO import StringIO
194
        from bzrlib.osutils import sha_file
195
196
        # This is a mapping from each revision id to it's sha hash
197
        rev_to_sha1 = {}
198
199
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
200
            assert rev.revision_id == rev_info.rev_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
201
            sio = StringIO()
202
            pack_xml(rev, sio)
203
            sio.seek(0)
204
            sha1 = sha_file(sio)
205
            if sha1 != rev_info.sha1:
206
                raise BzrError('Revision checksum mismatch.'
207
                    ' For rev_id {%s} supplied sha1 (%s) != measured (%s)'
208
                    % (rev.revision_id, rev_info.sha1, sha1))
209
            if rev_to_sha1.has_key(rev.revision_id):
210
                raise BzrError('Revision {%s} given twice in the list'
211
                        % (rev.revision_id))
212
            rev_to_sha1[rev.revision_id] = sha1
213
214
        # Now that we've checked all the sha1 sums, we can make sure that
215
        # at least for the small list we have, all of the references are
216
        # valid.
217
        for rev in self.info.real_revisions:
218
            for parent in rev.parents:
219
                if parent.revision_id in rev_to_sha1:
220
                    if parent.revision_sha1 != rev_to_sha1[parent.revision_id]:
221
                        raise BzrError('Parent revision checksum mismatch.'
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
222
                                ' A parent was referenced with an'
223
                                ' incorrect checksum'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
224
                                ': {%r} %s != %s' % (parent.revision_id,
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
225
                                            parent.revision_sha1,
226
                                            rev_to_sha1[parent.revision_id]))
227
228
    def _validate_references_from_branch(self, branch):
229
        """Now that we have a branch which should have some of the
230
        revisions we care about, go through and validate all of them
231
        that we can.
232
        """
233
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
234
        inv_to_sha = {}
235
        def add_sha(d, rev_id, sha1):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
236
            if rev_id is None:
237
                if sha1 is not None:
238
                    raise BzrError('A Null revision should always'
239
                        'have a null sha1 hash')
240
                return
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
241
            if rev_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
242
                # This really should have been validated as part
243
                # of _validate_revisions but lets do it again
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
244
                if sha1 != d[rev_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
245
                    raise BzrError('** Revision %r referenced with 2 different'
246
                            ' sha hashes %s != %s' % (rev_id,
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
247
                                sha1, d[rev_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
248
            else:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
249
                d[rev_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
250
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
251
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
252
        # All of the contained revisions were checked
253
        # in _validate_revisions
254
        checked = {}
255
        for rev_info in self.info.revisions:
256
            checked[rev_info.rev_id] = True
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
257
            add_sha(rev_to_sha, rev_info.rev_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
258
                
259
        for rev in self.info.real_revisions:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
260
            add_sha(inv_to_sha, rev_info.inventory_id, rev_info.inventory_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
261
            for parent in rev.parents:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
262
                add_sha(rev_to_sha, parent.revision_id, parent.revision_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
263
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
264
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
265
        missing = {}
266
        for rev_id, sha1 in rev_to_sha.iteritems():
267
            if rev_id in branch.revision_store:
268
                local_sha1 = branch.get_revision_sha1(rev_id)
269
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
270
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
271
                            'local: %s, cset: %s' % (rev_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
272
                else:
273
                    count += 1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
274
            elif rev_id not in checked:
275
                missing[rev_id] = sha1
276
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
277
        for inv_id, sha1 in inv_to_sha.iteritems():
278
            if inv_id in branch.inventory_store:
279
                local_sha1 = branch.get_inventory_sha1(inv_id)
280
                if sha1 != local_sha1:
281
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
282
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
283
                else:
284
                    count += 1
285
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
286
        if len(missing) > 0:
287
            # I don't know if this is an error yet
288
            from bzrlib.trace import warning
289
            warning('Not all revision hashes could be validated.'
290
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
291
        mutter('Verified %d sha hashes for the changeset.' % count)
292
293
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
294
        """At this point we should have generated the ChangesetTree,
295
        so build up an inventory, and make sure the hashes match.
296
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
297
        from bzrlib.xml import pack_xml
298
        from cStringIO import StringIO
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
299
        from bzrlib.osutils import sha_file
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
300
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
301
        assert inv is not None
302
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
303
        # Now we should have a complete inventory entry.
304
        sio = StringIO()
305
        pack_xml(inv, sio)
306
        sio.seek(0)
307
        sha1 = sha_file(sio)
308
        # Target revision is the last entry in the real_revisions list
309
        rev = self.info.real_revisions[-1]
310
        if sha1 != rev.inventory_sha1:
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
311
            open(',,bogus-inv', 'wb').write(sio.getvalue())
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
312
            raise BzrError('Inventory sha hash mismatch.')
313
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
314
        
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
315
    def get_changeset(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
316
        """Return the meta information, and a Changeset tree which can
317
        be used to populate the local stores and working tree, respectively.
318
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
319
        self._validate_references_from_branch(branch)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
320
        tree = ChangesetTree(branch.revision_tree(self.info.base))
321
        self._update_tree(tree)
322
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
323
        inv = tree.inventory
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
324
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
325
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
326
        return self.info, tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
327
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
328
    def _next(self):
329
        """yield the next line, but secretly
330
        keep 1 extra line for peeking.
331
        """
332
        for line in self.from_file:
333
            last = self._next_line
334
            self._next_line = line
335
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
336
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
337
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
338
        last = self._next_line
339
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
340
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
341
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
342
343
    def _read_header(self):
344
        """Read the bzr header"""
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
345
        from common import decode, get_header, header_str
346
        header = get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
347
        found = False
348
        for line in self._next():
349
            if found:
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
350
                # not all mailers will keep trailing whitespace
351
                if line == '#\n':
352
                    line = '# \n'
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
353
                if (line[:2] != '# ' or line[-1:] != '\n'
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
354
                        or decode(line[2:-1]) != header[0]):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
355
                    raise MalformedHeader('Found a header, but it'
356
                        ' was improperly formatted')
357
                header.pop(0) # We read this line.
358
                if not header:
359
                    break # We found everything.
360
            elif (line[:1] == '#' and line[-1:] == '\n'):
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
361
                line = decode(line[1:-1].strip())
362
                if line[:len(header_str)] == header_str:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
363
                    if line == header[0]:
364
                        found = True
365
                    else:
366
                        raise MalformedHeader('Found what looks like'
367
                                ' a header, but did not match')
368
                    header.pop(0)
369
        else:
370
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
371
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
372
        for line in self._next():
373
            # The bzr header is terminated with a blank line
374
            # which does not start with '#'
375
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
376
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
377
            self._handle_next(line)
378
379
    def _read_next_entry(self, line, indent=1):
380
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
381
        """
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
382
        from common import decode
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
383
        if line[:1] != '#':
384
            raise MalformedHeader('Bzr header did not start with #')
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
385
        line = decode(line[1:-1]) # Remove the '#' and '\n'
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
386
        if line[:indent] == ' '*indent:
387
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
388
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
389
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
390
391
        loc = line.find(': ')
392
        if loc != -1:
393
            key = line[:loc]
394
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
395
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
396
                value = self._read_many(indent=indent+3)
397
        elif line[-1:] == ':':
398
            key = line[:-1]
399
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
400
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
401
            raise MalformedHeader('While looking for key: value pairs,'
402
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
403
404
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
405
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
406
        return key, value
407
408
    def _handle_next(self, line):
409
        key, value = self._read_next_entry(line, indent=1)
410
        if key is None:
411
            return
412
413
        if key == 'revision':
414
            self._read_revision(value)
415
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
416
            if getattr(self.info, key) is None:
417
                setattr(self.info, key, value)
418
            else:
419
                raise MalformedHeader('Duplicated Key: %s' % key)
420
        else:
421
            # What do we do with a key we don't recognize
422
            raise MalformedHeader('Unknown Key: %s' % key)
423
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
424
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
425
        """If a line ends with no entry, that means that it should be
426
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
427
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
428
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
429
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
430
        """
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
431
        from common import decode
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
432
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
433
        start = '#' + (' '*indent)
434
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
435
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
436
            return values
437
438
        for line in self._next():
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
439
            values.append(decode(line[len(start):-1]))
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
440
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
441
                break
442
        return values
443
444
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
445
        """Read in one patch, return the complete patch, along with
446
        the next line.
447
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
448
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
449
        """
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
450
        from common import decode
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
451
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
452
        # Peek and see if there are no patches
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
453
        if self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
454
            return None, [], False
455
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
456
        first = True
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
457
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
458
        for line in self._next():
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
459
            if first:
460
                if line[:3] != '***':
461
                    raise MalformedPatches('The first line of all patches'
462
                        ' should be a bzr meta line "***"'
463
                        ': %r' % line)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
464
                action = decode(line[4:-1])
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
465
            if self._next_line is not None and self._next_line[:3] == '***':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
466
                return action, lines, True
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
467
            elif self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
468
                return action, lines, False
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
469
470
            if first:
471
                first = False
472
            else:
473
                lines.append(line)
474
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
475
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
476
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
477
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
478
        do_continue = True
479
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
480
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
481
            if action is not None:
482
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
483
484
    def _read_revision(self, rev_id):
485
        """Revision entries have extra information associated.
486
        """
487
        rev_info = RevisionInfo(rev_id)
488
        start = '#    '
489
        for line in self._next():
490
            key,value = self._read_next_entry(line, indent=4)
491
            #if key is None:
492
            #    continue
493
            if hasattr(rev_info, key):
494
                if getattr(rev_info, key) is None:
495
                    setattr(rev_info, key, value)
496
                else:
497
                    raise MalformedHeader('Duplicated Key: %s' % key)
498
            else:
499
                # What do we do with a key we don't recognize
500
                raise MalformedHeader('Unknown Key: %s' % key)
501
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
502
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
503
                break
504
505
        self.info.revisions.append(rev_info)
506
507
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
508
        """Read the rest of the meta information.
509
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
510
        :param first_line:  The previous step iterates past what it
511
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
512
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
513
        for line in self._next():
514
            self._handle_next(line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
515
            if self._next_line is None or self._next_line[:1] != '#':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
516
                break
517
518
    def _update_tree(self, tree):
519
        """This fills out a ChangesetTree based on the information
520
        that was read in.
521
522
        :param tree: A ChangesetTree to update with the new information.
523
        """
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
524
        from common import decode, guess_text_id
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
525
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
526
        def get_text_id(info, file_id, kind):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
527
            if info is not None:
528
                if info[:8] != 'text-id:':
529
                    raise BzrError("Text ids should be prefixed with 'text-id:'"
530
                        ': %r' % info)
531
                text_id = decode(info[8:])
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
532
            elif tree._text_ids.has_key(file_id):
533
                return tree._text_ids[file_id]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
534
            else:
535
                # If text_id was not explicitly supplied
536
                # then it should be whatever we would guess it to be
537
                # based on the base revision, and what we know about
538
                # the target revision
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
539
                text_id = guess_text_id(tree.base_tree, 
540
                        file_id, self.info.base, kind, modified=True)
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
541
            tree.note_text_id(file_id, text_id)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
542
            return text_id
543
544
        def renamed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
545
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
546
            if len(info) < 2:
547
                raise BzrError('renamed action lines need both a from and to'
548
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
549
            old_path = info[0]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
550
            if info[1][:3] == '=> ':
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
551
                new_path = info[1][3:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
552
            else:
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
553
                new_path = info[1]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
554
0.5.88 by John Arbash Meinel
Fixed a bug in the rename code, added more tests.
555
            file_id = tree.path2id(old_path)
556
            print '%r %r %r' % (old_path, new_path, file_id)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
557
            if len(info) > 2:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
558
                text_id = get_text_id(info[2], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
559
            else:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
560
                text_id = get_text_id(None, file_id, kind)
0.5.88 by John Arbash Meinel
Fixed a bug in the rename code, added more tests.
561
            print '%r %r %r %r %r' % (old_path, new_path, file_id, text_id, tree._text_ids[file_id])
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
562
            tree.note_rename(old_path, new_path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
563
            if lines:
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
564
                tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
565
566
        def removed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
567
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
568
            if len(info) > 1:
569
                # TODO: in the future we might allow file ids to be
570
                # given for removed entries
571
                raise BzrError('removed action lines should only have the path'
572
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
573
            path = info[0]
0.5.84 by John Arbash Meinel
(broken) problem with removes.
574
            tree.note_deletion(path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
575
576
        def added(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
577
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
578
            if len(info) <= 1:
579
                raise BzrError('add action lines require the path and file id'
580
                        ': %r' % extra)
581
            elif len(info) > 3:
582
                raise BzrError('add action lines have fewer than 3 entries.'
583
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
584
            path = info[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
585
            if info[1][:8] != 'file-id:':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
586
                raise BzrError('The file-id should follow the path for an add'
587
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
588
            file_id = info[1][8:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
589
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
590
            tree.note_id(file_id, path, kind)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
591
            if kind == 'directory':
592
                return
593
            if len(info) > 2:
594
                text_id = get_text_id(info[2], file_id, kind)
595
            else:
596
                text_id = get_text_id(None, file_id, kind)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
597
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
598
599
        def modified(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
600
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
601
            if len(info) < 1:
602
                raise BzrError('modified action lines have at least'
603
                        'the path in them: %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
604
            path = info[0]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
605
606
            file_id = tree.path2id(path)
607
            if len(info) > 1:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
608
                text_id = get_text_id(info[1], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
609
            else:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
610
                text_id = get_text_id(None, file_id, kind)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
611
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
612
            
613
614
        valid_actions = {
615
            'renamed':renamed,
616
            'removed':removed,
617
            'added':added,
618
            'modified':modified
619
        }
620
        for action_line, lines in self.info.actions:
621
            first = action_line.find(' ')
622
            if first == -1:
623
                raise BzrError('Bogus action line'
624
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
625
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
626
            if second == -1:
627
                raise BzrError('Bogus action line'
628
                        ' (missing second space): %r' % action_line)
629
            action = action_line[:first]
630
            kind = action_line[first+1:second]
631
            if kind not in ('file', 'directory'):
632
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
633
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
634
            extra = action_line[second+1:]
635
636
            if action not in valid_actions:
637
                raise BzrError('Bogus action line'
638
                        ' (unrecognized action): %r' % action_line)
639
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
640
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
641
def read_changeset(from_file, branch):
642
    """Read in a changeset from a iterable object (such as a file object)
643
644
    :param from_file: A file-like object to read the changeset information.
645
    :param branch: This will be used to build the changeset tree, it needs
646
                   to contain the base of the changeset. (Which you probably
647
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
648
    """
649
    cr = ChangesetReader(from_file)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
650
    return cr.get_changeset(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
651
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
652
class ChangesetTree(Tree):
653
    def __init__(self, base_tree):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
654
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
655
        self._renamed = {} # Mapping from old_path => new_path
656
        self._renamed_r = {} # new_path => old_path
657
        self._new_id = {} # new_path => new_id
658
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
659
        self._kinds = {} # new_id => kind
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
660
        self._text_ids = {} # new_id => text_id
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
661
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
662
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
663
        self.contents_by_id = True
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
664
        self._inventory = None
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
665
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
666
    def __str__(self):
667
        return pprint.pformat(self.__dict__)
668
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
669
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
670
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
671
        assert not self._renamed.has_key(old_path)
672
        assert not self._renamed_r.has_key(new_path)
673
        self._renamed[new_path] = old_path
674
        self._renamed_r[old_path] = new_path
675
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
676
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
677
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
678
        self._new_id[new_path] = new_id
679
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
680
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
681
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
682
    def note_text_id(self, file_id, text_id):
683
        if (self._text_ids.has_key(file_id)
684
                and self._text_ids[file_id] != text_id):
685
            raise BzrError('Mismatched text_ids for file_id {%s}'
686
                    ': %s != %s' % (file_id,
687
                                    self._text_ids[file_id],
688
                                    text_id))
689
        self._text_ids[file_id] = text_id
690
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
691
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
692
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
693
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
694
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
695
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
696
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
697
        self.deleted.append(old_path)
698
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
699
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
700
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
701
        import os.path
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
702
        assert new_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
703
        old_path = self._renamed.get(new_path)
704
        if old_path is not None:
705
            return old_path
706
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
707
        # dirname is not '' doesn't work, because
708
        # dirname may be a unicode entry, and is
709
        # requires the objects to be identical
710
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
711
            old_dir = self.old_path(dirname)
712
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
713
                old_path = None
714
            else:
715
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
716
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
717
            old_path = new_path
718
        #If the new path wasn't in renamed, the old one shouldn't be in
719
        #renamed_r
720
        if self._renamed_r.has_key(old_path):
721
            return None
722
        return old_path 
723
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
724
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
725
        """Get the new_path (path in the target_tree) for the file at old_path
726
        in the base tree.
727
        """
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
728
        import os.path
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
729
        assert old_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
730
        new_path = self._renamed_r.get(old_path)
731
        if new_path is not None:
732
            return new_path
733
        if self._renamed.has_key(new_path):
734
            return None
735
        dirname,basename = os.path.split(old_path)
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
736
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
737
            new_dir = self.new_path(dirname)
738
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
739
                new_path = None
740
            else:
741
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
742
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
743
            new_path = old_path
744
        #If the old path wasn't in renamed, the new one shouldn't be in
745
        #renamed_r
746
        if self._renamed.has_key(new_path):
747
            return None
748
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
749
750
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
751
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
752
        file_id = self._new_id.get(path)
753
        if file_id is not None:
754
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
755
        old_path = self.old_path(path)
756
        if old_path is None:
757
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
758
        if old_path in self.deleted:
759
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
760
        if hasattr(self.base_tree, 'path2id'):
761
            return self.base_tree.path2id(old_path)
762
        else:
763
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
764
765
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
766
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
767
        path = self._new_id_r.get(file_id)
768
        if path is not None:
769
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
770
        old_path = self.base_tree.id2path(file_id)
771
        if old_path is None:
772
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
773
        if old_path in self.deleted:
774
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
775
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
776
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
777
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
778
        """Return the id in the base_tree for the given file_id,
779
        or None if the file did not exist in base.
780
781
        FIXME:  Something doesn't seem right here. It seems like this function
782
                should always either return None or file_id. Even if
783
                you are doing the by-path lookup, you are doing a
784
                id2path lookup, just to do the reverse path2id lookup.
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
785
786
        Notice that you're doing the path2id on a different tree!
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
787
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
788
        if self.contents_by_id:
789
            if self.base_tree.has_id(file_id):
790
                return file_id
791
            else:
792
                return None
793
        new_path = self.id2path(file_id)
794
        return self.base_tree.path2id(new_path)
795
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
796
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
797
        """Return a file-like object containing the new contents of the
798
        file given by file_id.
799
800
        TODO:   It might be nice if this actually generated an entry
801
                in the text-store, so that the file contents would
802
                then be cached.
803
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
804
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
805
        if base_id is not None:
806
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
807
        else:
808
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
809
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
810
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
811
            return patch_original
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
812
813
        assert not file_patch.startswith('\\'), \
814
            'Malformed patch for %s, %r' % (file_id, file_patch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
815
        return patched_file(file_patch, patch_original)
816
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
817
    def get_kind(self, file_id):
818
        if file_id in self._kinds:
819
            return self._kinds[file_id]
820
        return self.base_tree.inventory[file_id].kind
821
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
822
    def get_text_id(self, file_id):
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
823
        if self.get_kind(file_id) in ('root_directory', 'directory'):
824
            return None
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
825
        if file_id in self._text_ids:
826
            return self._text_ids[file_id]
827
        return self.base_tree.inventory[file_id].text_id
828
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
829
    def get_size_and_sha1(self, file_id):
830
        """Return the size and sha1 hash of the given file id.
831
        If the file was not locally modified, this is extracted
832
        from the base_tree. Rather than re-reading the file.
833
        """
834
        from bzrlib.osutils import sha_string
835
836
        new_path = self.id2path(file_id)
837
        if new_path is None:
838
            return None, None
839
        if new_path not in self.patches:
840
            # If the entry does not have a patch, then the
841
            # contents must be the same as in the base_tree
842
            ie = self.base_tree.inventory[file_id]
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
843
            if ie.text_size is None:
844
                return ie.text_size, ie.text_sha1
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
845
            return int(ie.text_size), ie.text_sha1
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
846
        fileobj = self.get_file(file_id)
847
        content = fileobj.read()
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
848
        return len(content), sha_string(content)
849
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
850
851
    def _get_inventory(self):
852
        """Build up the inventory entry for the ChangesetTree.
853
854
        This need to be called before ever accessing self.inventory
855
        """
856
        from os.path import dirname, basename
857
        from bzrlib.inventory import Inventory, InventoryEntry
858
859
        assert self.base_tree is not None
860
        base_inv = self.base_tree.inventory
861
        root_id = base_inv.root.file_id
862
        try:
863
            # New inventories have a unique root_id
864
            inv = Inventory(root_id)
865
        except TypeError:
866
            inv = Inventory()
867
868
        def add_entry(file_id):
869
            path = self.id2path(file_id)
870
            if path is None:
871
                return
872
            parent_path = dirname(path)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
873
            if parent_path == '':
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
874
                parent_id = root_id
875
            else:
876
                parent_id = self.path2id(parent_path)
877
878
            kind = self.get_kind(file_id)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
879
            if kind == 'directory':
880
                text_id = None
881
            else:
882
                text_id = self.get_text_id(file_id)
883
884
            name = basename(path)
0.5.88 by John Arbash Meinel
Fixed a bug in the rename code, added more tests.
885
            print '%r %r %r %r' % (path, name, file_id, text_id)
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
886
            ie = InventoryEntry(file_id, name, kind, parent_id, text_id=text_id)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
887
            if kind == 'directory':
888
                ie.text_size, ie.text_sha1 = None, None
889
            else:
890
                ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
891
            if (ie.text_size is None) and (kind != 'directory'):
892
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
893
            inv.add(ie)
894
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
895
        sorted_entries = self.sorted_path_id()
896
        for path, file_id in sorted_entries:
897
            if file_id == inv.root.file_id:
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
898
                continue
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
899
            add_entry(file_id)
900
901
        return inv
902
903
    # Have to overload the inherited inventory property
904
    # because _get_inventory is only called in the parent.
905
    # Reading the docs, property() objects do not use
906
    # overloading, they use the function as it was defined
907
    # at that instant
908
    inventory = property(_get_inventory)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
909
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
910
    def __iter__(self):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
911
        for path, entry in self.inventory.iter_entries():
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
912
            yield entry.file_id
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
913
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
914
    def sorted_path_id(self):
915
        paths = []
916
        for result in self._new_id.iteritems():
917
            paths.append(result)
918
        for id in self.base_tree:
919
            path = self.id2path(id)
920
            if path is None:
921
                continue
922
            paths.append((path, id))
923
        paths.sort()
924
        return paths
925
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
926
def patched_file(file_patch, original):
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
927
    """Produce a file-like object with the patched version of a text"""
928
    from patches import iter_patched
929
    from iterablefile import IterableFile
930
    if file_patch == "":
931
        return IterableFile(())
932
    return IterableFile(iter_patched(original, file_patch.splitlines(True)))
933