~bzr-pqm/bzr/bzr.dev

3193.8.13 by Aaron Bentley
Update texts
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3193.8.32 by Aaron Bentley
Update GPL preamble
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3193.8.13 by Aaron Bentley
Update texts
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
3193.8.13 by Aaron Bentley
Update texts
18
3193.8.4 by Aaron Bentley
Get rename detection working for files.
19
from cStringIO import StringIO
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
20
21
from bzrlib import (
22
    osutils,
23
    progress,
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
24
    trace,
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
25
)
3193.8.16 by Aaron Bentley
Get a dict of required parents.
26
from bzrlib.ui import ui_factory
6138.3.4 by Jonathan Riddell
add gettext() to uses of trace.note()
27
from bzrlib.i18n import gettext
3193.8.4 by Aaron Bentley
Get rename detection working for files.
28
29
class RenameMap(object):
3193.8.13 by Aaron Bentley
Update texts
30
    """Determine a mapping of renames."""
3193.8.4 by Aaron Bentley
Get rename detection working for files.
31
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
32
    def __init__(self, tree):
33
        self.tree = tree
3193.8.4 by Aaron Bentley
Get rename detection working for files.
34
        self.edge_hashes = {}
35
3193.8.11 by Aaron Bentley
Make hash iterator static
36
    @staticmethod
37
    def iter_edge_hashes(lines):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
38
        """Iterate through the hashes of line pairs (which make up an edge).
39
40
        The hash is truncated using a modulus to avoid excessive memory
41
        consumption by the hitscount dict.  A modulus of 10Mi means that the
42
        maximum number of keys is 10Mi.  (Keys are normally 32 bits, e.g.
43
        4 Gi)
44
        """
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
45
        modulus = 1024 * 1024 * 10
3193.8.4 by Aaron Bentley
Get rename detection working for files.
46
        for n in range(len(lines)):
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
47
            yield hash(tuple(lines[n:n+2])) % modulus
3193.8.4 by Aaron Bentley
Get rename detection working for files.
48
49
    def add_edge_hashes(self, lines, tag):
3193.8.13 by Aaron Bentley
Update texts
50
        """Update edge_hashes to include the given lines.
51
52
        :param lines: The lines to update the hashes for.
53
        :param tag: A tag uniquely associated with these lines (i.e. file-id)
54
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
55
        for my_hash in self.iter_edge_hashes(lines):
56
            self.edge_hashes.setdefault(my_hash, set()).add(tag)
57
58
    def add_file_edge_hashes(self, tree, file_ids):
3193.8.13 by Aaron Bentley
Update texts
59
        """Update to reflect the hashes for files in the tree.
60
61
        :param tree: The tree containing the files.
62
        :param file_ids: A list of file_ids to perform the updates for.
63
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
64
        desired_files = [(f, f) for f in file_ids]
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
65
        task = ui_factory.nested_progress_bar()
66
        try:
67
            for num, (file_id, contents) in enumerate(
68
                tree.iter_files_bytes(desired_files)):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
69
                task.update(gettext('Calculating hashes'), num, len(file_ids))
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
70
                s = StringIO()
71
                s.writelines(contents)
72
                s.seek(0)
73
                self.add_edge_hashes(s.readlines(), file_id)
74
        finally:
75
            task.finished()
3193.8.4 by Aaron Bentley
Get rename detection working for files.
76
77
    def hitcounts(self, lines):
3193.8.13 by Aaron Bentley
Update texts
78
        """Count the number of hash hits for each tag, for the given lines.
79
80
        Hits are weighted according to the number of tags the hash is
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
81
        associated with; more tags means that the hash is less rare and should
82
        tend to be ignored.
3193.8.13 by Aaron Bentley
Update texts
83
        :param lines: The lines to calculate hashes of.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
84
        :return: a dict of {tag: hitcount}
3193.8.13 by Aaron Bentley
Update texts
85
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
86
        hits = {}
87
        for my_hash in self.iter_edge_hashes(lines):
88
            tags = self.edge_hashes.get(my_hash)
89
            if tags is None:
90
                continue
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
91
            taglen = len(tags)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
92
            for tag in tags:
93
                if tag not in hits:
94
                    hits[tag] = 0
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
95
                hits[tag] += 1.0 / taglen
3193.8.4 by Aaron Bentley
Get rename detection working for files.
96
        return hits
97
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
98
    def get_all_hits(self, paths):
99
        """Find all the hit counts for the listed paths in the tree.
3193.8.13 by Aaron Bentley
Update texts
100
101
        :return: A list of tuples of count, path, file_id.
102
        """
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
103
        all_hits = []
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
104
        task = ui_factory.nested_progress_bar()
105
        try:
106
            for num, path in enumerate(paths):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
107
                task.update(gettext('Determining hash hits'), num, len(paths))
3193.8.26 by Aaron Bentley
Updates from review.
108
                hits = self.hitcounts(self.tree.get_file_lines(None,
109
                                                               path=path))
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
110
                all_hits.extend((v, path, k) for k, v in hits.items())
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
111
        finally:
112
            task.finished()
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
113
        return all_hits
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
114
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
115
    def file_match(self, paths):
3193.8.13 by Aaron Bentley
Update texts
116
        """Return a mapping from file_ids to the supplied paths."""
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
117
        return self._match_hits(self.get_all_hits(paths))
3193.8.17 by Aaron Bentley
Get directory rename handling working.
118
119
    @staticmethod
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
120
    def _match_hits(hit_list):
3193.8.26 by Aaron Bentley
Updates from review.
121
        """Using a hit list, determine a path-to-fileid map.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
122
123
        The hit list is a list of (count, path, file_id), where count is a
124
        (possibly float) number, with higher numbers indicating stronger
125
        matches.
126
        """
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
127
        seen_file_ids = set()
128
        path_map = {}
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
129
        for count, path, file_id in sorted(hit_list, reverse=True):
3193.8.26 by Aaron Bentley
Updates from review.
130
            if path in path_map or file_id in seen_file_ids:
3193.8.7 by Aaron Bentley
Saner algorithm for picking optimal file.
131
                continue
132
            path_map[path] = file_id
133
            seen_file_ids.add(file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
134
        return path_map
3193.8.16 by Aaron Bentley
Get a dict of required parents.
135
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
136
    def get_required_parents(self, matches):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
137
        """Return a dict of all file parents that must be versioned.
138
139
        The keys are the required parents and the values are sets of their
140
        children.
141
        """
3193.8.16 by Aaron Bentley
Get a dict of required parents.
142
        required_parents = {}
143
        for path in matches:
144
            while True:
145
                child = path
146
                path = osutils.dirname(path)
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
147
                if self.tree.path2id(path) is not None:
3193.8.16 by Aaron Bentley
Get a dict of required parents.
148
                    break
149
                required_parents.setdefault(path, []).append(child)
3193.8.17 by Aaron Bentley
Get directory rename handling working.
150
        require_ids = {}
151
        for parent, children in required_parents.iteritems():
152
            child_file_ids = set()
153
            for child in children:
154
                file_id = matches.get(child)
155
                if file_id is not None:
156
                    child_file_ids.add(file_id)
157
            require_ids[parent] = child_file_ids
158
        return require_ids
159
160
    def match_parents(self, required_parents, missing_parents):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
161
        """Map parent directories to file-ids.
162
163
        This is done by finding similarity between the file-ids of children of
164
        required parent directories and the file-ids of children of missing
165
        parent directories.
166
        """
167
        all_hits = []
3193.8.17 by Aaron Bentley
Get directory rename handling working.
168
        for file_id, file_id_children in missing_parents.iteritems():
169
            for path, path_children in required_parents.iteritems():
170
                hits = len(path_children.intersection(file_id_children))
171
                if hits > 0:
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
172
                    all_hits.append((hits, path, file_id))
173
        return self._match_hits(all_hits)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
174
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
175
    def _find_missing_files(self, basis):
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
176
        missing_files = set()
177
        missing_parents = {}
178
        candidate_files = set()
3193.8.25 by Aaron Bentley
Improve progress reporting.
179
        task = ui_factory.nested_progress_bar()
180
        iterator = self.tree.iter_changes(basis, want_unversioned=True,
181
                                          pb=task)
182
        try:
183
            for (file_id, paths, changed_content, versioned, parent, name,
184
                 kind, executable) in iterator:
185
                if kind[1] is None and versioned[1]:
186
                    missing_parents.setdefault(parent[0], set()).add(file_id)
187
                    if kind[0] == 'file':
188
                        missing_files.add(file_id)
189
                    else:
190
                        #other kinds are not handled
191
                        pass
192
                if versioned == (False, False):
193
                    if self.tree.is_ignored(paths[1]):
194
                        continue
195
                    if kind[1] == 'file':
196
                        candidate_files.add(paths[1])
197
                    if kind[1] == 'directory':
198
                        for _dir, children in self.tree.walkdirs(paths[1]):
199
                            for child in children:
200
                                if child[2] == 'file':
201
                                    candidate_files.add(child[0])
202
        finally:
203
            task.finished()
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
204
        return missing_files, missing_parents, candidate_files
205
206
    @classmethod
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
207
    def guess_renames(klass, tree, dry_run=False):
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
208
        """Guess which files to rename, and perform the rename.
209
210
        We assume that unversioned files and missing files indicate that
211
        versioned files have been renamed outside of Bazaar.
3193.8.26 by Aaron Bentley
Updates from review.
212
213
        :param tree: A write-locked working tree.
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
214
        """
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
215
        required_parents = {}
3193.8.25 by Aaron Bentley
Improve progress reporting.
216
        task = ui_factory.nested_progress_bar()
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
217
        try:
3193.8.25 by Aaron Bentley
Improve progress reporting.
218
            pp = progress.ProgressPhase('Guessing renames', 4, task)
219
            basis = tree.basis_tree()
220
            basis.lock_read()
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
221
            try:
3193.8.25 by Aaron Bentley
Improve progress reporting.
222
                rn = klass(tree)
223
                pp.next_phase()
224
                missing_files, missing_parents, candidate_files = (
225
                    rn._find_missing_files(basis))
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
226
                pp.next_phase()
227
                rn.add_file_edge_hashes(basis, missing_files)
228
            finally:
3193.8.25 by Aaron Bentley
Improve progress reporting.
229
                basis.unlock()
230
            pp.next_phase()
231
            matches = rn.file_match(candidate_files)
232
            parents_matches = matches
233
            while len(parents_matches) > 0:
234
                required_parents = rn.get_required_parents(
235
                    parents_matches)
236
                parents_matches = rn.match_parents(required_parents,
237
                                                   missing_parents)
238
                matches.update(parents_matches)
239
            pp.next_phase()
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
240
            delta = rn._make_inventory_delta(matches)
241
            for old, new, file_id, entry in delta:
6138.3.4 by Jonathan Riddell
add gettext() to uses of trace.note()
242
                trace.note( gettext("{0} => {1}").format(old, new) )
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
243
            if not dry_run:
244
                tree.add(required_parents)
245
                tree.apply_inventory_delta(delta)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
246
        finally:
3193.8.25 by Aaron Bentley
Improve progress reporting.
247
            task.finished()
3193.8.23 by Aaron Bentley
Split up guess_renames further.
248
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
249
    def _make_inventory_delta(self, matches):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
250
        delta = []
251
        file_id_matches = dict((f, p) for p, f in matches.items())
252
        for old_path, entry in self.tree.iter_entries_by_dir(matches.values()):
253
            new_path = file_id_matches[entry.file_id]
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
254
            parent_path, new_name = osutils.split(new_path)
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
255
            parent_id = matches.get(parent_path)
256
            if parent_id is None:
257
                parent_id = self.tree.path2id(parent_path)
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
258
            if entry.name == new_name and entry.parent_id == parent_id:
259
                continue
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
260
            new_entry = entry.copy()
261
            new_entry.parent_id = parent_id
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
262
            new_entry.name = new_name
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
263
            delta.append((old_path, new_path, new_entry.file_id, new_entry))
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
264
        return delta