~bzr-pqm/bzr/bzr.dev

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# Copyright (C) 2005, 2006 Canonical

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

from bzrlib.inventory import InventoryEntry
from bzrlib.trace import mutter


class TreeDelta(object):
    """Describes changes from one tree to another.

    Contains four lists:

    added
        (path, id, kind)
    removed
        (path, id, kind)
    renamed
        (oldpath, newpath, id, kind, text_modified, meta_modified)
    modified
        (path, id, kind, text_modified, meta_modified)
    unchanged
        (path, id, kind)

    Each id is listed only once.

    Files that are both modified and renamed are listed only in
    renamed, with the text_modified flag true. The text_modified
    applies either to the the content of the file or the target of the
    symbolic link, depending of the kind of file.

    Files are only considered renamed if their name has changed or
    their parent directory has changed.  Renaming a directory
    does not count as renaming all its contents.

    The lists are normally sorted when the delta is created.
    """
    def __init__(self):
        self.added = []
        self.removed = []
        self.renamed = []
        self.modified = []
        self.unchanged = []

    def __eq__(self, other):
        if not isinstance(other, TreeDelta):
            return False
        return self.added == other.added \
               and self.removed == other.removed \
               and self.renamed == other.renamed \
               and self.modified == other.modified \
               and self.unchanged == other.unchanged

    def __ne__(self, other):
        return not (self == other)

    def __repr__(self):
        return "TreeDelta(added=%r, removed=%r, renamed=%r, modified=%r," \
            " unchanged=%r)" % (self.added, self.removed, self.renamed,
            self.modified, self.unchanged)

    def has_changed(self):
        return bool(self.modified
                    or self.added
                    or self.removed
                    or self.renamed)

    def touches_file_id(self, file_id):
        """Return True if file_id is modified by this delta."""
        for l in self.added, self.removed, self.modified:
            for v in l:
                if v[1] == file_id:
                    return True
        for v in self.renamed:
            if v[2] == file_id:
                return True
        return False
            

    def show(self, to_file, show_ids=False, show_unchanged=False):
        """output this delta in status-like form to to_file."""
        def show_list(files):
            for item in files:
                path, fid, kind = item[:3]

                if kind == 'directory':
                    path += '/'
                elif kind == 'symlink':
                    path += '@'

                if len(item) == 5 and item[4]:
                    path += '*'

                if show_ids:
                    print >>to_file, '  %-30s %s' % (path, fid)
                else:
                    print >>to_file, ' ', path
            
        if self.removed:
            print >>to_file, 'removed:'
            show_list(self.removed)
                
        if self.added:
            print >>to_file, 'added:'
            show_list(self.added)

        extra_modified = []

        if self.renamed:
            print >>to_file, 'renamed:'
            for (oldpath, newpath, fid, kind,
                 text_modified, meta_modified) in self.renamed:
                if text_modified or meta_modified:
                    extra_modified.append((newpath, fid, kind,
                                           text_modified, meta_modified))
                if meta_modified:
                    newpath += '*'
                if show_ids:
                    print >>to_file, '  %s => %s %s' % (oldpath, newpath, fid)
                else:
                    print >>to_file, '  %s => %s' % (oldpath, newpath)
                    
        if self.modified or extra_modified:
            print >>to_file, 'modified:'
            show_list(self.modified)
            show_list(extra_modified)
            
        if show_unchanged and self.unchanged:
            print >>to_file, 'unchanged:'
            show_list(self.unchanged)


def compare_trees(old_tree, new_tree, want_unchanged=False, specific_files=None):
    """Describe changes from one tree to another.

    Returns a TreeDelta with details of added, modified, renamed, and
    deleted entries.

    The root entry is specifically exempt.

    This only considers versioned files.

    want_unchanged
        If true, also list files unchanged from one version to
        the next.

    specific_files
        If true, only check for changes to specified names or
        files within them.  Any unversioned files given have no effect
        (but this might change in the future).
    """
    # NB: show_status depends on being able to pass in non-versioned files and
    # report them as unknown
    old_tree.lock_read()
    try:
        new_tree.lock_read()
        try:
            return _compare_trees(old_tree, new_tree, want_unchanged,
                                  specific_files)
        finally:
            new_tree.unlock()
    finally:
        old_tree.unlock()


def _compare_trees(old_tree, new_tree, want_unchanged, specific_files):

    from osutils import is_inside_any
    
    old_inv = old_tree.inventory
    new_inv = new_tree.inventory
    delta = TreeDelta()
    mutter('start compare_trees')

    # TODO: Rather than iterating over the whole tree and then filtering, we
    # could diff just the specified files (if any) and their subtrees.  
    # Perhaps should take a list of file-ids instead?   Need to indicate any
    # ids or names which were not found in the trees.

    old_files = old_tree.list_files()
    new_files = new_tree.list_files()

    more_old = True
    more_new = True

    added = {}
    removed = {}

    def get_next(iter):
        try:
            return iter.next()
        except StopIteration:
            return None, None, None, None, None
    old_path, old_class, old_kind, old_file_id, old_entry = get_next(old_files)
    new_path, new_class, new_kind, new_file_id, new_entry = get_next(new_files)


    def check_matching(old_path, old_entry, new_path, new_entry):
        """We have matched up 2 file_ids, check for changes."""
        assert old_entry.kind == new_entry.kind

        if old_entry.kind == 'root_directory':
            return

        if specific_files:
            if (not is_inside_any(specific_files, old_path)
                and not is_inside_any(specific_files, new_path)):
                return

        # temporary hack until all entries are populated before clients 
        # get them
        old_entry._read_tree_state(old_path, old_tree)
        new_entry._read_tree_state(new_path, new_tree)
        text_modified, meta_modified = new_entry.detect_changes(old_entry)
        
        # If the name changes, or the parent_id changes, we have a rename
        # (if we move a parent, that doesn't count as a rename for the file)
        if (old_entry.name != new_entry.name 
            or old_entry.parent_id != new_entry.parent_id):
            delta.renamed.append((old_path,
                                  new_path,
                                  old_entry.file_id, old_entry.kind,
                                  text_modified, meta_modified))
        elif text_modified or meta_modified:
            delta.modified.append((new_path, new_entry.file_id, new_entry.kind,
                                   text_modified, meta_modified))
        elif want_unchanged:
            delta.unchanged.append((new_path, new_entry.file_id, new_entry.kind))


    def handle_old(path, entry):
        """old entry without a new entry match

        Check to see if a matching new entry was already seen as an
        added file, and switch the pair into being a rename.
        Otherwise just mark the old entry being removed.
        """
        if entry.file_id in added:
            # Actually this is a rename, we found a new file_id earlier
            # at a different location, so it is no-longer added
            x_new_path, x_new_entry = added.pop(entry.file_id)
            check_matching(path, entry, x_new_path, x_new_entry)
        else:
            # We have an old_file_id which doesn't line up with a new_file_id
            # So this file looks to be removed
            assert entry.file_id not in removed
            removed[entry.file_id] = path, entry

    def handle_new(path, entry):
        """new entry without an old entry match
        
        Check to see if a matching old entry was already seen as a
        removal, and change the pair into a rename.
        Otherwise just mark the new entry as an added file.
        """
        if entry.file_id in removed:
            # We saw this file_id earlier at an old different location
            # it is no longer removed, just renamed
            x_old_path, x_old_entry = removed.pop(entry.file_id)
            check_matching(x_old_path, x_old_entry, path, entry)
        else:
            # We have a new file which does not match an old file
            # mark it as added
            assert entry.file_id not in added
            added[entry.file_id] = path, entry

    while old_path or new_path:
        # list_files() returns files in alphabetical path sorted order
        if old_path == new_path:
            if old_file_id == new_file_id:
                # This is the common case, the files are in the same place
                # check if there were any content changes

                if old_file_id is None:
                    # We have 2 unversioned files, no deltas possible???
                    pass
                else:
                    check_matching(old_path, old_entry, new_path, new_entry)
            else:
                # The ids don't match, so we have to handle them both
                # separately.
                if old_file_id is not None:
                    handle_old(old_path, old_entry)

                if new_file_id is not None:
                    handle_new(new_path, new_entry)

            # The two entries were at the same path, so increment both sides
            old_path, old_class, old_kind, old_file_id, old_entry = get_next(old_files)
            new_path, new_class, new_kind, new_file_id, new_entry = get_next(new_files)
        elif new_path is None or (old_path is not None and old_path < new_path):
            # Assume we don't match, only process old_path
            if old_file_id is not None:
                handle_old(old_path, old_entry)
            # old_path came first, so increment it, trying to match up
            old_path, old_class, old_kind, old_file_id, old_entry = get_next(old_files)
        elif new_path is not None:
            # new_path came first, so increment it, trying to match up
            if new_file_id is not None:
                handle_new(new_path, new_entry)
            new_path, new_class, new_kind, new_file_id, new_entry = get_next(new_files)

    # Now we have a set of added and removed files, mark them all
    for old_path, old_entry in removed.itervalues():
        if specific_files:
            if not is_inside_any(specific_files, old_path):
                continue
        delta.removed.append((old_path, old_entry.file_id, old_entry.kind))
    for new_path, new_entry in added.itervalues():
        if specific_files:
            if not is_inside_any(specific_files, new_path):
                continue
        delta.added.append((new_path, new_entry.file_id, new_entry.kind))

    delta.removed.sort()
    delta.added.sort()
    delta.renamed.sort()
    # TODO: jam 20060529 These lists shouldn't need to be sorted
    #       since we added them in alphabetical order.
    delta.modified.sort()
    delta.unchanged.sort()

    return delta