~bzr-pqm/bzr/bzr.dev

3640.2.4 by John Arbash Meinel
Copyright updates
1
# Copyright (C) 2007, 2008 Canonical Ltd
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
16
17
"""Python implementations of Dirstate Helper functions."""
18
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
19
import binascii
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
20
import os
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
21
import struct
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
22
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
23
# We cannot import the dirstate module, because it loads this module
24
# All we really need is the IN_MEMORY_MODIFIED constant
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
25
from bzrlib import errors
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
26
from bzrlib.dirstate import DirState
2474.1.63 by John Arbash Meinel
Found a small bug in the python version of _read_dirblocks.
27
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
28
6169.1.3 by Martin Packman
Abbreviate pack_stat struct format to '>6L'
29
def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack):
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
30
    """Convert stat values into a packed representation
31
32
    Not all of the fields from the stat included are strictly needed, and by
33
    just encoding the mtime and mode a slight speed increase could be gained.
34
    However, using the pyrex version instead is a bigger win.
35
    """
36
    # base64 encoding always adds a final newline, so strip it off
6169.2.2 by Martin Packman
Merge fix for overflow issues in pack_stat from 2.4
37
    return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF,
38
        int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF,
39
        st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
40
41
42
def _unpack_stat(packed_stat):
43
    """Turn a packed_stat back into the stat fields.
44
45
    This is meant as a debugging tool, should not be used in real code.
46
    """
47
    (st_size, st_mtime, st_ctime, st_dev, st_ino,
6169.1.3 by Martin Packman
Abbreviate pack_stat struct format to '>6L'
48
     st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat))
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
49
    return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
50
                st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
51
52
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
53
def _bisect_path_left(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
54
    """Return the index where to insert path into paths.
55
56
    This uses the dirblock sorting. So all children in a directory come before
57
    the children of children. For example::
58
59
        a/
60
          b/
61
            c
62
          d/
63
            e
64
          b-c
65
          d-e
66
        a-a
67
        a=c
68
69
    Will be sorted as::
70
71
        a
72
        a-a
73
        a=c
74
        a/b
75
        a/b-c
76
        a/d
77
        a/d-e
78
        a/b/c
79
        a/d/e
80
81
    :param paths: A list of paths to search through
82
    :param path: A single path to insert
83
    :return: An offset where 'path' can be inserted.
84
    :seealso: bisect.bisect_left
85
    """
86
    hi = len(paths)
87
    lo = 0
88
    while lo < hi:
89
        mid = (lo + hi) // 2
90
        # Grab the dirname for the current dirblock
91
        cur = paths[mid]
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
92
        if _cmp_path_by_dirblock(cur, path) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
93
            lo = mid + 1
94
        else:
95
            hi = mid
96
    return lo
97
98
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
99
def _bisect_path_right(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
100
    """Return the index where to insert path into paths.
101
102
    This uses a path-wise comparison so we get::
103
        a
104
        a-b
105
        a=b
106
        a/b
107
    Rather than::
108
        a
109
        a-b
110
        a/b
111
        a=b
112
    :param paths: A list of paths to search through
113
    :param path: A single path to insert
114
    :return: An offset where 'path' can be inserted.
115
    :seealso: bisect.bisect_right
116
    """
117
    hi = len(paths)
118
    lo = 0
119
    while lo < hi:
120
        mid = (lo+hi)//2
121
        # Grab the dirname for the current dirblock
122
        cur = paths[mid]
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
123
        if _cmp_path_by_dirblock(path, cur) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
124
            hi = mid
125
        else:
126
            lo = mid + 1
127
    return lo
128
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
129
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
130
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
131
    """Return the index where to insert dirname into the dirblocks.
132
133
    The return value idx is such that all directories blocks in dirblock[:idx]
134
    have names < dirname, and all blocks in dirblock[idx:] have names >=
135
    dirname.
136
137
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
138
    slice of a to be searched.
139
    """
140
    if hi is None:
141
        hi = len(dirblocks)
142
    try:
143
        dirname_split = cache[dirname]
144
    except KeyError:
145
        dirname_split = dirname.split('/')
146
        cache[dirname] = dirname_split
147
    while lo < hi:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
148
        mid = (lo + hi) // 2
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
149
        # Grab the dirname for the current dirblock
150
        cur = dirblocks[mid][0]
151
        try:
152
            cur_split = cache[cur]
153
        except KeyError:
154
            cur_split = cur.split('/')
155
            cache[cur] = cur_split
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
156
        if cur_split < dirname_split: lo = mid + 1
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
157
        else: hi = mid
158
    return lo
159
160
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
161
def cmp_by_dirs(path1, path2):
2474.1.66 by John Arbash Meinel
Some restructuring.
162
    """Compare two paths directory by directory.
163
164
    This is equivalent to doing::
165
166
       cmp(path1.split('/'), path2.split('/'))
167
168
    The idea is that you should compare path components separately. This
169
    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
170
    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
171
172
    :param path1: first path
173
    :param path2: second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
174
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
175
        0 if paths are equal,
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
176
        and positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
177
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
178
    if not isinstance(path1, str):
179
        raise TypeError("'path1' must be a plain string, not %s: %r"
180
                        % (type(path1), path1))
181
    if not isinstance(path2, str):
182
        raise TypeError("'path2' must be a plain string, not %s: %r"
183
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
184
    return cmp(path1.split('/'), path2.split('/'))
185
186
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
187
def _cmp_path_by_dirblock(path1, path2):
2474.1.66 by John Arbash Meinel
Some restructuring.
188
    """Compare two paths based on what directory they are in.
189
190
    This generates a sort order, such that all children of a directory are
191
    sorted together, and grandchildren are in the same order as the
192
    children appear. But all grandchildren come after all children.
193
194
    :param path1: first path
195
    :param path2: the second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
196
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
197
        0 if paths are equal
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
198
        and a positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
199
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
200
    if not isinstance(path1, str):
201
        raise TypeError("'path1' must be a plain string, not %s: %r"
202
                        % (type(path1), path1))
203
    if not isinstance(path2, str):
204
        raise TypeError("'path2' must be a plain string, not %s: %r"
205
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
206
    dirname1, basename1 = os.path.split(path1)
207
    key1 = (dirname1.split('/'), basename1)
208
    dirname2, basename2 = os.path.split(path2)
209
    key2 = (dirname2.split('/'), basename2)
210
    return cmp(key1, key2)
211
212
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
213
def _read_dirblocks(state):
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
214
    """Read in the dirblocks for the given DirState object.
215
216
    This is tightly bound to the DirState internal representation. It should be
217
    thought of as a member function, which is only separated out so that we can
218
    re-write it in pyrex.
219
220
    :param state: A DirState object.
221
    :return: None
222
    """
223
    state._state_file.seek(state._end_of_header)
224
    text = state._state_file.read()
225
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
226
227
    fields = text.split('\0')
228
    # Remove the last blank entry
229
    trailing = fields.pop()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
230
    if trailing != '':
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
231
        raise errors.DirstateCorrupt(state,
232
            'trailing garbage: %r' % (trailing,))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
233
    # consider turning fields into a tuple.
234
235
    # skip the first field which is the trailing null from the header.
236
    cur = 1
237
    # Each line now has an extra '\n' field which is not used
238
    # so we just skip over it
239
    # entry size:
240
    #  3 fields for the key
241
    #  + number of fields per tree_data (5) * tree count
242
    #  + newline
243
    num_present_parents = state._num_present_parents()
244
    tree_count = 1 + num_present_parents
245
    entry_size = state._fields_per_entry()
246
    expected_field_count = entry_size * state._num_entries
247
    field_count = len(fields)
248
    # this checks our adjustment, and also catches file too short.
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
249
    if field_count - cur != expected_field_count:
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
250
        raise errors.DirstateCorrupt(state,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
251
            'field count incorrect %s != %s, entry_size=%s, '\
252
            'num_entries=%s fields=%r' % (
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
253
            field_count - cur, expected_field_count, entry_size,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
254
            state._num_entries, fields))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
255
256
    if num_present_parents == 1:
257
        # Bind external functions to local names
258
        _int = int
259
        # We access all fields in order, so we can just iterate over
260
        # them. Grab an straight iterator over the fields. (We use an
261
        # iterator because we don't want to do a lot of additions, nor
262
        # do we want to do a lot of slicing)
263
        next = iter(fields).next
264
        # Move the iterator to the current position
265
        for x in xrange(cur):
266
            next()
267
        # The two blocks here are deliberate: the root block and the
268
        # contents-of-root block.
269
        state._dirblocks = [('', []), ('', [])]
270
        current_block = state._dirblocks[0][1]
271
        current_dirname = ''
272
        append_entry = current_block.append
273
        for count in xrange(state._num_entries):
274
            dirname = next()
275
            name = next()
276
            file_id = next()
277
            if dirname != current_dirname:
278
                # new block - different dirname
279
                current_block = []
280
                current_dirname = dirname
281
                state._dirblocks.append((current_dirname, current_block))
282
                append_entry = current_block.append
283
            # we know current_dirname == dirname, so re-use it to avoid
284
            # creating new strings
285
            entry = ((current_dirname, name, file_id),
286
                     [(# Current Tree
287
                         next(),                # minikind
288
                         next(),                # fingerprint
289
                         _int(next()),          # size
290
                         next() == 'y',         # executable
291
                         next(),                # packed_stat or revision_id
292
                     ),
293
                     ( # Parent 1
294
                         next(),                # minikind
295
                         next(),                # fingerprint
296
                         _int(next()),          # size
297
                         next() == 'y',         # executable
298
                         next(),                # packed_stat or revision_id
299
                     ),
300
                     ])
301
            trailing = next()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
302
            if trailing != '\n':
303
                raise ValueError("trailing garbage in dirstate: %r" % trailing)
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
304
            # append the entry to the current block
305
            append_entry(entry)
306
        state._split_root_dirblock_into_contents()
307
    else:
308
        fields_to_entry = state._get_fields_to_entry()
309
        entries = [fields_to_entry(fields[pos:pos+entry_size])
310
                   for pos in xrange(cur, field_count, entry_size)]
311
        state._entries_to_current_state(entries)
312
    # To convert from format 2  => format 3
313
    # state._dirblocks = sorted(state._dirblocks,
314
    #                          key=lambda blk:blk[0].split('/'))
315
    # To convert from format 3 => format 2
316
    # state._dirblocks = sorted(state._dirblocks)
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
317
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED