~bzr-pqm/bzr/bzr.dev

3640.2.4 by John Arbash Meinel
Copyright updates
1
# Copyright (C) 2007, 2008 Canonical Ltd
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
16
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
17
"""Python implementations of Dirstate Helper functions."""
18
6379.6.1 by Jelmer Vernooij
Import absolute_import in a few places.
19
from __future__ import absolute_import
20
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
21
import binascii
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
22
import os
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
23
import struct
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
24
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
25
# We cannot import the dirstate module, because it loads this module
26
# All we really need is the IN_MEMORY_MODIFIED constant
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
27
from bzrlib import errors
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
28
from bzrlib.dirstate import DirState
2474.1.63 by John Arbash Meinel
Found a small bug in the python version of _read_dirblocks.
29
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
30
6169.1.3 by Martin Packman
Abbreviate pack_stat struct format to '>6L'
31
def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack):
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
32
    """Convert stat values into a packed representation
33
34
    Not all of the fields from the stat included are strictly needed, and by
35
    just encoding the mtime and mode a slight speed increase could be gained.
36
    However, using the pyrex version instead is a bigger win.
37
    """
38
    # base64 encoding always adds a final newline, so strip it off
6169.2.2 by Martin Packman
Merge fix for overflow issues in pack_stat from 2.4
39
    return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF,
40
        int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF,
41
        st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
42
43
44
def _unpack_stat(packed_stat):
45
    """Turn a packed_stat back into the stat fields.
46
47
    This is meant as a debugging tool, should not be used in real code.
48
    """
49
    (st_size, st_mtime, st_ctime, st_dev, st_ino,
6169.1.3 by Martin Packman
Abbreviate pack_stat struct format to '>6L'
50
     st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat))
6169.1.1 by Martin Packman
Move pack_stat to python dirstate helpers and expose and use pyrex version
51
    return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
52
                st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
53
54
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
55
def _bisect_path_left(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
56
    """Return the index where to insert path into paths.
57
58
    This uses the dirblock sorting. So all children in a directory come before
59
    the children of children. For example::
60
61
        a/
62
          b/
63
            c
64
          d/
65
            e
66
          b-c
67
          d-e
68
        a-a
69
        a=c
70
71
    Will be sorted as::
72
73
        a
74
        a-a
75
        a=c
76
        a/b
77
        a/b-c
78
        a/d
79
        a/d-e
80
        a/b/c
81
        a/d/e
82
83
    :param paths: A list of paths to search through
84
    :param path: A single path to insert
85
    :return: An offset where 'path' can be inserted.
86
    :seealso: bisect.bisect_left
87
    """
88
    hi = len(paths)
89
    lo = 0
90
    while lo < hi:
91
        mid = (lo + hi) // 2
92
        # Grab the dirname for the current dirblock
93
        cur = paths[mid]
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
94
        if _cmp_path_by_dirblock(cur, path) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
95
            lo = mid + 1
96
        else:
97
            hi = mid
98
    return lo
99
100
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
101
def _bisect_path_right(paths, path):
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
102
    """Return the index where to insert path into paths.
103
104
    This uses a path-wise comparison so we get::
105
        a
106
        a-b
107
        a=b
108
        a/b
109
    Rather than::
110
        a
111
        a-b
112
        a/b
113
        a=b
114
    :param paths: A list of paths to search through
115
    :param path: A single path to insert
116
    :return: An offset where 'path' can be inserted.
117
    :seealso: bisect.bisect_right
118
    """
119
    hi = len(paths)
120
    lo = 0
121
    while lo < hi:
122
        mid = (lo+hi)//2
123
        # Grab the dirname for the current dirblock
124
        cur = paths[mid]
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
125
        if _cmp_path_by_dirblock(path, cur) < 0:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
126
            hi = mid
127
        else:
128
            lo = mid + 1
129
    return lo
130
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
131
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
132
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
133
    """Return the index where to insert dirname into the dirblocks.
134
135
    The return value idx is such that all directories blocks in dirblock[:idx]
136
    have names < dirname, and all blocks in dirblock[idx:] have names >=
137
    dirname.
138
139
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
140
    slice of a to be searched.
141
    """
142
    if hi is None:
143
        hi = len(dirblocks)
144
    try:
145
        dirname_split = cache[dirname]
146
    except KeyError:
147
        dirname_split = dirname.split('/')
148
        cache[dirname] = dirname_split
149
    while lo < hi:
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
150
        mid = (lo + hi) // 2
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
151
        # Grab the dirname for the current dirblock
152
        cur = dirblocks[mid][0]
153
        try:
154
            cur_split = cache[cur]
155
        except KeyError:
156
            cur_split = cur.split('/')
157
            cache[cur] = cur_split
2474.1.58 by John Arbash Meinel
(broken) Try to properly implement DirState._bisect*
158
        if cur_split < dirname_split: lo = mid + 1
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
159
        else: hi = mid
160
    return lo
161
162
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
163
def cmp_by_dirs(path1, path2):
2474.1.66 by John Arbash Meinel
Some restructuring.
164
    """Compare two paths directory by directory.
165
166
    This is equivalent to doing::
167
168
       cmp(path1.split('/'), path2.split('/'))
169
170
    The idea is that you should compare path components separately. This
171
    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
172
    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
173
174
    :param path1: first path
175
    :param path2: second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
176
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
177
        0 if paths are equal,
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
178
        and positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
179
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
180
    if not isinstance(path1, str):
181
        raise TypeError("'path1' must be a plain string, not %s: %r"
182
                        % (type(path1), path1))
183
    if not isinstance(path2, str):
184
        raise TypeError("'path2' must be a plain string, not %s: %r"
185
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
186
    return cmp(path1.split('/'), path2.split('/'))
187
188
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
189
def _cmp_path_by_dirblock(path1, path2):
2474.1.66 by John Arbash Meinel
Some restructuring.
190
    """Compare two paths based on what directory they are in.
191
192
    This generates a sort order, such that all children of a directory are
193
    sorted together, and grandchildren are in the same order as the
194
    children appear. But all grandchildren come after all children.
195
196
    :param path1: first path
197
    :param path2: the second path
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
198
    :return: negative number if ``path1`` comes first,
2474.1.66 by John Arbash Meinel
Some restructuring.
199
        0 if paths are equal
2872.4.10 by Martin Pool
docstrings for cmp_ functions seem to be backwards
200
        and a positive number if ``path2`` sorts first
2474.1.66 by John Arbash Meinel
Some restructuring.
201
    """
2474.1.70 by John Arbash Meinel
Lot's of fixes from Martin's comments.
202
    if not isinstance(path1, str):
203
        raise TypeError("'path1' must be a plain string, not %s: %r"
204
                        % (type(path1), path1))
205
    if not isinstance(path2, str):
206
        raise TypeError("'path2' must be a plain string, not %s: %r"
207
                        % (type(path2), path2))
2474.1.66 by John Arbash Meinel
Some restructuring.
208
    dirname1, basename1 = os.path.split(path1)
209
    key1 = (dirname1.split('/'), basename1)
210
    dirname2, basename2 = os.path.split(path2)
211
    key2 = (dirname2.split('/'), basename2)
212
    return cmp(key1, key2)
213
214
4459.2.2 by Vincent Ladeuil
Use the same method or function names for _dirstate_helpers in pyrex and
215
def _read_dirblocks(state):
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
216
    """Read in the dirblocks for the given DirState object.
217
218
    This is tightly bound to the DirState internal representation. It should be
219
    thought of as a member function, which is only separated out so that we can
220
    re-write it in pyrex.
221
222
    :param state: A DirState object.
223
    :return: None
224
    """
225
    state._state_file.seek(state._end_of_header)
226
    text = state._state_file.read()
227
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
228
229
    fields = text.split('\0')
230
    # Remove the last blank entry
231
    trailing = fields.pop()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
232
    if trailing != '':
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
233
        raise errors.DirstateCorrupt(state,
234
            'trailing garbage: %r' % (trailing,))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
235
    # consider turning fields into a tuple.
236
237
    # skip the first field which is the trailing null from the header.
238
    cur = 1
239
    # Each line now has an extra '\n' field which is not used
240
    # so we just skip over it
241
    # entry size:
242
    #  3 fields for the key
243
    #  + number of fields per tree_data (5) * tree count
244
    #  + newline
245
    num_present_parents = state._num_present_parents()
246
    tree_count = 1 + num_present_parents
247
    entry_size = state._fields_per_entry()
248
    expected_field_count = entry_size * state._num_entries
249
    field_count = len(fields)
250
    # this checks our adjustment, and also catches file too short.
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
251
    if field_count - cur != expected_field_count:
3640.2.5 by John Arbash Meinel
Change from using AssertionError to using DirstateCorrupt in a few places
252
        raise errors.DirstateCorrupt(state,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
253
            'field count incorrect %s != %s, entry_size=%s, '\
254
            'num_entries=%s fields=%r' % (
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
255
            field_count - cur, expected_field_count, entry_size,
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
256
            state._num_entries, fields))
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
257
258
    if num_present_parents == 1:
259
        # Bind external functions to local names
260
        _int = int
261
        # We access all fields in order, so we can just iterate over
262
        # them. Grab an straight iterator over the fields. (We use an
263
        # iterator because we don't want to do a lot of additions, nor
264
        # do we want to do a lot of slicing)
265
        next = iter(fields).next
266
        # Move the iterator to the current position
267
        for x in xrange(cur):
268
            next()
269
        # The two blocks here are deliberate: the root block and the
270
        # contents-of-root block.
271
        state._dirblocks = [('', []), ('', [])]
272
        current_block = state._dirblocks[0][1]
273
        current_dirname = ''
274
        append_entry = current_block.append
275
        for count in xrange(state._num_entries):
276
            dirname = next()
277
            name = next()
278
            file_id = next()
279
            if dirname != current_dirname:
280
                # new block - different dirname
281
                current_block = []
282
                current_dirname = dirname
283
                state._dirblocks.append((current_dirname, current_block))
284
                append_entry = current_block.append
285
            # we know current_dirname == dirname, so re-use it to avoid
286
            # creating new strings
287
            entry = ((current_dirname, name, file_id),
288
                     [(# Current Tree
289
                         next(),                # minikind
290
                         next(),                # fingerprint
291
                         _int(next()),          # size
292
                         next() == 'y',         # executable
293
                         next(),                # packed_stat or revision_id
294
                     ),
295
                     ( # Parent 1
296
                         next(),                # minikind
297
                         next(),                # fingerprint
298
                         _int(next()),          # size
299
                         next() == 'y',         # executable
300
                         next(),                # packed_stat or revision_id
301
                     ),
302
                     ])
303
            trailing = next()
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
304
            if trailing != '\n':
305
                raise ValueError("trailing garbage in dirstate: %r" % trailing)
2474.1.57 by John Arbash Meinel
Move code around to refactor according to our pyrex extension design.
306
            # append the entry to the current block
307
            append_entry(entry)
308
        state._split_root_dirblock_into_contents()
309
    else:
310
        fields_to_entry = state._get_fields_to_entry()
311
        entries = [fields_to_entry(fields[pos:pos+entry_size])
312
                   for pos in xrange(cur, field_count, entry_size)]
313
        state._entries_to_current_state(entries)
314
    # To convert from format 2  => format 3
315
    # state._dirblocks = sorted(state._dirblocks,
316
    #                          key=lambda blk:blk[0].split('/'))
317
    # To convert from format 3 => format 2
318
    # state._dirblocks = sorted(state._dirblocks)
2474.1.65 by John Arbash Meinel
Found an import dependency bug if the compiled version is not available.
319
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED