~bzr-pqm/bzr/bzr.dev

2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Python implementations of Dirstate Helper functions."""
18
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
19
import os
20
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
21
# We cannot import the dirstate module, because it loads this module
22
# All we really need is the IN_MEMORY_MODIFIED constant
23
from bzrlib.dirstate import DirState
2474.1.63 by John Arbash Meinel
Found a small bug in the python version of _read_dirblocks.
24
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
25
2474.1.66 by John Arbash Meinel
Some restructuring.
26
def _bisect_path_left_py(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
27
    """Return the index where to insert path into paths.
28
29
    This uses the dirblock sorting. So all children in a directory come before
30
    the children of children. For example::
31
32
        a/
33
          b/
34
            c
35
          d/
36
            e
37
          b-c
38
          d-e
39
        a-a
40
        a=c
41
42
    Will be sorted as::
43
44
        a
45
        a-a
46
        a=c
47
        a/b
48
        a/b-c
49
        a/d
50
        a/d-e
51
        a/b/c
52
        a/d/e
53
54
    :param paths: A list of paths to search through
55
    :param path: A single path to insert
56
    :return: An offset where 'path' can be inserted.
57
    :seealso: bisect.bisect_left
58
    """
59
    hi = len(paths)
60
    lo = 0
61
    while lo < hi:
62
        mid = (lo + hi) // 2
63
        # Grab the dirname for the current dirblock
64
        cur = paths[mid]
2474.1.66 by John Arbash Meinel
Some restructuring.
65
        if _cmp_path_by_dirblock_py(cur, path) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
66
            lo = mid + 1
67
        else:
68
            hi = mid
69
    return lo
70
71
2474.1.66 by John Arbash Meinel
Some restructuring.
72
def _bisect_path_right_py(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
73
    """Return the index where to insert path into paths.
74
75
    This uses a path-wise comparison so we get::
76
        a
77
        a-b
78
        a=b
79
        a/b
80
    Rather than::
81
        a
82
        a-b
83
        a/b
84
        a=b
85
    :param paths: A list of paths to search through
86
    :param path: A single path to insert
87
    :return: An offset where 'path' can be inserted.
88
    :seealso: bisect.bisect_right
89
    """
90
    hi = len(paths)
91
    lo = 0
92
    while lo < hi:
93
        mid = (lo+hi)//2
94
        # Grab the dirname for the current dirblock
95
        cur = paths[mid]
2474.1.66 by John Arbash Meinel
Some restructuring.
96
        if _cmp_path_by_dirblock_py(path, cur) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
97
            hi = mid
98
        else:
99
            lo = mid + 1
100
    return lo
101
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
102
103
def bisect_dirblock_py(dirblocks, dirname, lo=0, hi=None, cache={}):
104
    """Return the index where to insert dirname into the dirblocks.
105
106
    The return value idx is such that all directories blocks in dirblock[:idx]
107
    have names < dirname, and all blocks in dirblock[idx:] have names >=
108
    dirname.
109
110
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
111
    slice of a to be searched.
112
    """
113
    if hi is None:
114
        hi = len(dirblocks)
115
    try:
116
        dirname_split = cache[dirname]
117
    except KeyError:
118
        dirname_split = dirname.split('/')
119
        cache[dirname] = dirname_split
120
    while lo < hi:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
121
        mid = (lo + hi) // 2
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
122
        # Grab the dirname for the current dirblock
123
        cur = dirblocks[mid][0]
124
        try:
125
            cur_split = cache[cur]
126
        except KeyError:
127
            cur_split = cur.split('/')
128
            cache[cur] = cur_split
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
129
        if cur_split < dirname_split: lo = mid + 1
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
130
        else: hi = mid
131
    return lo
132
133
2474.1.66 by John Arbash Meinel
Some restructuring.
134
def cmp_by_dirs_py(path1, path2):
135
    """Compare two paths directory by directory.
136
137
    This is equivalent to doing::
138
139
       cmp(path1.split('/'), path2.split('/'))
140
141
    The idea is that you should compare path components separately. This
142
    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
143
    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
144
145
    :param path1: first path
146
    :param path2: second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
147
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
148
        0 if paths are equal,
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
149
        and positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
150
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
151
    if not isinstance(path1, str):
152
        raise TypeError("'path1' must be a plain string, not %s: %r"
153
                        % (type(path1), path1))
154
    if not isinstance(path2, str):
155
        raise TypeError("'path2' must be a plain string, not %s: %r"
156
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
157
    return cmp(path1.split('/'), path2.split('/'))
158
159
160
def _cmp_path_by_dirblock_py(path1, path2):
161
    """Compare two paths based on what directory they are in.
162
163
    This generates a sort order, such that all children of a directory are
164
    sorted together, and grandchildren are in the same order as the
165
    children appear. But all grandchildren come after all children.
166
167
    :param path1: first path
168
    :param path2: the second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
169
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
170
        0 if paths are equal
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
171
        and a positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
172
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
173
    if not isinstance(path1, str):
174
        raise TypeError("'path1' must be a plain string, not %s: %r"
175
                        % (type(path1), path1))
176
    if not isinstance(path2, str):
177
        raise TypeError("'path2' must be a plain string, not %s: %r"
178
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
179
    dirname1, basename1 = os.path.split(path1)
180
    key1 = (dirname1.split('/'), basename1)
181
    dirname2, basename2 = os.path.split(path2)
182
    key2 = (dirname2.split('/'), basename2)
183
    return cmp(key1, key2)
184
185
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
186
def _read_dirblocks_py(state):
187
    """Read in the dirblocks for the given DirState object.
188
189
    This is tightly bound to the DirState internal representation. It should be
190
    thought of as a member function, which is only separated out so that we can
191
    re-write it in pyrex.
192
193
    :param state: A DirState object.
194
    :return: None
195
    """
196
    state._state_file.seek(state._end_of_header)
197
    text = state._state_file.read()
198
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
199
200
    fields = text.split('\0')
201
    # Remove the last blank entry
202
    trailing = fields.pop()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
203
    if trailing != '':
3376.2.8 by Martin Pool
Some review cleanups for assertion removal
204
        raise AssertionError("dirstate line %r has trailing garbage: %r" 
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
205
            % (trailing,))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
206
    # consider turning fields into a tuple.
207
208
    # skip the first field which is the trailing null from the header.
209
    cur = 1
210
    # Each line now has an extra '\n' field which is not used
211
    # so we just skip over it
212
    # entry size:
213
    #  3 fields for the key
214
    #  + number of fields per tree_data (5) * tree count
215
    #  + newline
216
    num_present_parents = state._num_present_parents()
217
    tree_count = 1 + num_present_parents
218
    entry_size = state._fields_per_entry()
219
    expected_field_count = entry_size * state._num_entries
220
    field_count = len(fields)
221
    # this checks our adjustment, and also catches file too short.
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
222
    if field_count - cur != expected_field_count:
223
        raise AssertionError(
224
            'field count incorrect %s != %s, entry_size=%s, '\
225
            'num_entries=%s fields=%r' % (
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
226
            field_count - cur, expected_field_count, entry_size,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
227
            state._num_entries, fields))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
228
229
    if num_present_parents == 1:
230
        # Bind external functions to local names
231
        _int = int
232
        # We access all fields in order, so we can just iterate over
233
        # them. Grab an straight iterator over the fields. (We use an
234
        # iterator because we don't want to do a lot of additions, nor
235
        # do we want to do a lot of slicing)
236
        next = iter(fields).next
237
        # Move the iterator to the current position
238
        for x in xrange(cur):
239
            next()
240
        # The two blocks here are deliberate: the root block and the
241
        # contents-of-root block.
242
        state._dirblocks = [('', []), ('', [])]
243
        current_block = state._dirblocks[0][1]
244
        current_dirname = ''
245
        append_entry = current_block.append
246
        for count in xrange(state._num_entries):
247
            dirname = next()
248
            name = next()
249
            file_id = next()
250
            if dirname != current_dirname:
251
                # new block - different dirname
252
                current_block = []
253
                current_dirname = dirname
254
                state._dirblocks.append((current_dirname, current_block))
255
                append_entry = current_block.append
256
            # we know current_dirname == dirname, so re-use it to avoid
257
            # creating new strings
258
            entry = ((current_dirname, name, file_id),
259
                     [(# Current Tree
260
                         next(),                # minikind
261
                         next(),                # fingerprint
262
                         _int(next()),          # size
263
                         next() == 'y',         # executable
264
                         next(),                # packed_stat or revision_id
265
                     ),
266
                     ( # Parent 1
267
                         next(),                # minikind
268
                         next(),                # fingerprint
269
                         _int(next()),          # size
270
                         next() == 'y',         # executable
271
                         next(),                # packed_stat or revision_id
272
                     ),
273
                     ])
274
            trailing = next()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
275
            if trailing != '\n':
276
                raise ValueError("trailing garbage in dirstate: %r" % trailing)
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
277
            # append the entry to the current block
278
            append_entry(entry)
279
        state._split_root_dirblock_into_contents()
280
    else:
281
        fields_to_entry = state._get_fields_to_entry()
282
        entries = [fields_to_entry(fields[pos:pos+entry_size])
283
                   for pos in xrange(cur, field_count, entry_size)]
284
        state._entries_to_current_state(entries)
285
    # To convert from format 2  => format 3
286
    # state._dirblocks = sorted(state._dirblocks,
287
    #                          key=lambda blk:blk[0].split('/'))
288
    # To convert from format 3 => format 2
289
    # state._dirblocks = sorted(state._dirblocks)
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
290
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
291