~bzr-pqm/bzr/bzr.dev

3640.2.4 by John Arbash Meinel
Copyright updates
1
# Copyright (C) 2007, 2008 Canonical Ltd
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Python implementations of Dirstate Helper functions."""
18
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
19
import os
20
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
21
# We cannot import the dirstate module, because it loads this module
22
# All we really need is the IN_MEMORY_MODIFIED constant
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
23
from bzrlib import errors
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
24
from bzrlib.dirstate import DirState
2474.1.63 by John Arbash Meinel
Found a small bug in the python version of _read_dirblocks.
25
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
26
2474.1.66 by John Arbash Meinel
Some restructuring.
27
def _bisect_path_left_py(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
28
    """Return the index where to insert path into paths.
29
30
    This uses the dirblock sorting. So all children in a directory come before
31
    the children of children. For example::
32
33
        a/
34
          b/
35
            c
36
          d/
37
            e
38
          b-c
39
          d-e
40
        a-a
41
        a=c
42
43
    Will be sorted as::
44
45
        a
46
        a-a
47
        a=c
48
        a/b
49
        a/b-c
50
        a/d
51
        a/d-e
52
        a/b/c
53
        a/d/e
54
55
    :param paths: A list of paths to search through
56
    :param path: A single path to insert
57
    :return: An offset where 'path' can be inserted.
58
    :seealso: bisect.bisect_left
59
    """
60
    hi = len(paths)
61
    lo = 0
62
    while lo < hi:
63
        mid = (lo + hi) // 2
64
        # Grab the dirname for the current dirblock
65
        cur = paths[mid]
2474.1.66 by John Arbash Meinel
Some restructuring.
66
        if _cmp_path_by_dirblock_py(cur, path) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
67
            lo = mid + 1
68
        else:
69
            hi = mid
70
    return lo
71
72
2474.1.66 by John Arbash Meinel
Some restructuring.
73
def _bisect_path_right_py(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
74
    """Return the index where to insert path into paths.
75
76
    This uses a path-wise comparison so we get::
77
        a
78
        a-b
79
        a=b
80
        a/b
81
    Rather than::
82
        a
83
        a-b
84
        a/b
85
        a=b
86
    :param paths: A list of paths to search through
87
    :param path: A single path to insert
88
    :return: An offset where 'path' can be inserted.
89
    :seealso: bisect.bisect_right
90
    """
91
    hi = len(paths)
92
    lo = 0
93
    while lo < hi:
94
        mid = (lo+hi)//2
95
        # Grab the dirname for the current dirblock
96
        cur = paths[mid]
2474.1.66 by John Arbash Meinel
Some restructuring.
97
        if _cmp_path_by_dirblock_py(path, cur) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
98
            hi = mid
99
        else:
100
            lo = mid + 1
101
    return lo
102
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
103
104
def bisect_dirblock_py(dirblocks, dirname, lo=0, hi=None, cache={}):
105
    """Return the index where to insert dirname into the dirblocks.
106
107
    The return value idx is such that all directories blocks in dirblock[:idx]
108
    have names < dirname, and all blocks in dirblock[idx:] have names >=
109
    dirname.
110
111
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
112
    slice of a to be searched.
113
    """
114
    if hi is None:
115
        hi = len(dirblocks)
116
    try:
117
        dirname_split = cache[dirname]
118
    except KeyError:
119
        dirname_split = dirname.split('/')
120
        cache[dirname] = dirname_split
121
    while lo < hi:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
122
        mid = (lo + hi) // 2
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
123
        # Grab the dirname for the current dirblock
124
        cur = dirblocks[mid][0]
125
        try:
126
            cur_split = cache[cur]
127
        except KeyError:
128
            cur_split = cur.split('/')
129
            cache[cur] = cur_split
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
130
        if cur_split < dirname_split: lo = mid + 1
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
131
        else: hi = mid
132
    return lo
133
134
2474.1.66 by John Arbash Meinel
Some restructuring.
135
def cmp_by_dirs_py(path1, path2):
136
    """Compare two paths directory by directory.
137
138
    This is equivalent to doing::
139
140
       cmp(path1.split('/'), path2.split('/'))
141
142
    The idea is that you should compare path components separately. This
143
    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
144
    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
145
146
    :param path1: first path
147
    :param path2: second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
148
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
149
        0 if paths are equal,
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
150
        and positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
151
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
152
    if not isinstance(path1, str):
153
        raise TypeError("'path1' must be a plain string, not %s: %r"
154
                        % (type(path1), path1))
155
    if not isinstance(path2, str):
156
        raise TypeError("'path2' must be a plain string, not %s: %r"
157
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
158
    return cmp(path1.split('/'), path2.split('/'))
159
160
161
def _cmp_path_by_dirblock_py(path1, path2):
162
    """Compare two paths based on what directory they are in.
163
164
    This generates a sort order, such that all children of a directory are
165
    sorted together, and grandchildren are in the same order as the
166
    children appear. But all grandchildren come after all children.
167
168
    :param path1: first path
169
    :param path2: the second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
170
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
171
        0 if paths are equal
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
172
        and a positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
173
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
174
    if not isinstance(path1, str):
175
        raise TypeError("'path1' must be a plain string, not %s: %r"
176
                        % (type(path1), path1))
177
    if not isinstance(path2, str):
178
        raise TypeError("'path2' must be a plain string, not %s: %r"
179
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
180
    dirname1, basename1 = os.path.split(path1)
181
    key1 = (dirname1.split('/'), basename1)
182
    dirname2, basename2 = os.path.split(path2)
183
    key2 = (dirname2.split('/'), basename2)
184
    return cmp(key1, key2)
185
186
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
187
def _read_dirblocks_py(state):
188
    """Read in the dirblocks for the given DirState object.
189
190
    This is tightly bound to the DirState internal representation. It should be
191
    thought of as a member function, which is only separated out so that we can
192
    re-write it in pyrex.
193
194
    :param state: A DirState object.
195
    :return: None
196
    """
197
    state._state_file.seek(state._end_of_header)
198
    text = state._state_file.read()
199
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
200
201
    fields = text.split('\0')
202
    # Remove the last blank entry
203
    trailing = fields.pop()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
204
    if trailing != '':
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
205
        raise errors.DirstateCorrupt(state,
206
            'trailing garbage: %r' % (trailing,))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
207
    # consider turning fields into a tuple.
208
209
    # skip the first field which is the trailing null from the header.
210
    cur = 1
211
    # Each line now has an extra '\n' field which is not used
212
    # so we just skip over it
213
    # entry size:
214
    #  3 fields for the key
215
    #  + number of fields per tree_data (5) * tree count
216
    #  + newline
217
    num_present_parents = state._num_present_parents()
218
    tree_count = 1 + num_present_parents
219
    entry_size = state._fields_per_entry()
220
    expected_field_count = entry_size * state._num_entries
221
    field_count = len(fields)
222
    # this checks our adjustment, and also catches file too short.
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
223
    if field_count - cur != expected_field_count:
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
224
        raise errors.DirstateCorrupt(state,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
225
            'field count incorrect %s != %s, entry_size=%s, '\
226
            'num_entries=%s fields=%r' % (
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
227
            field_count - cur, expected_field_count, entry_size,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
228
            state._num_entries, fields))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
229
230
    if num_present_parents == 1:
231
        # Bind external functions to local names
232
        _int = int
233
        # We access all fields in order, so we can just iterate over
234
        # them. Grab an straight iterator over the fields. (We use an
235
        # iterator because we don't want to do a lot of additions, nor
236
        # do we want to do a lot of slicing)
237
        next = iter(fields).next
238
        # Move the iterator to the current position
239
        for x in xrange(cur):
240
            next()
241
        # The two blocks here are deliberate: the root block and the
242
        # contents-of-root block.
243
        state._dirblocks = [('', []), ('', [])]
244
        current_block = state._dirblocks[0][1]
245
        current_dirname = ''
246
        append_entry = current_block.append
247
        for count in xrange(state._num_entries):
248
            dirname = next()
249
            name = next()
250
            file_id = next()
251
            if dirname != current_dirname:
252
                # new block - different dirname
253
                current_block = []
254
                current_dirname = dirname
255
                state._dirblocks.append((current_dirname, current_block))
256
                append_entry = current_block.append
257
            # we know current_dirname == dirname, so re-use it to avoid
258
            # creating new strings
259
            entry = ((current_dirname, name, file_id),
260
                     [(# Current Tree
261
                         next(),                # minikind
262
                         next(),                # fingerprint
263
                         _int(next()),          # size
264
                         next() == 'y',         # executable
265
                         next(),                # packed_stat or revision_id
266
                     ),
267
                     ( # Parent 1
268
                         next(),                # minikind
269
                         next(),                # fingerprint
270
                         _int(next()),          # size
271
                         next() == 'y',         # executable
272
                         next(),                # packed_stat or revision_id
273
                     ),
274
                     ])
275
            trailing = next()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
276
            if trailing != '\n':
277
                raise ValueError("trailing garbage in dirstate: %r" % trailing)
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
278
            # append the entry to the current block
279
            append_entry(entry)
280
        state._split_root_dirblock_into_contents()
281
    else:
282
        fields_to_entry = state._get_fields_to_entry()
283
        entries = [fields_to_entry(fields[pos:pos+entry_size])
284
                   for pos in xrange(cur, field_count, entry_size)]
285
        state._entries_to_current_state(entries)
286
    # To convert from format 2  => format 3
287
    # state._dirblocks = sorted(state._dirblocks,
288
    #                          key=lambda blk:blk[0].split('/'))
289
    # To convert from format 3 => format 2
290
    # state._dirblocks = sorted(state._dirblocks)
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
291
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED