1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
|
# Copyright (C) 2007, 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Python implementations of Dirstate Helper functions."""
import binascii
import os
import struct
# We cannot import the dirstate module, because it loads this module
# All we really need is the IN_MEMORY_MODIFIED constant
from bzrlib import errors
from bzrlib.dirstate import DirState
def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack):
"""Convert stat values into a packed representation
Not all of the fields from the stat included are strictly needed, and by
just encoding the mtime and mode a slight speed increase could be gained.
However, using the pyrex version instead is a bigger win.
"""
# base64 encoding always adds a final newline, so strip it off
return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF,
int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF,
st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
def _unpack_stat(packed_stat):
"""Turn a packed_stat back into the stat fields.
This is meant as a debugging tool, should not be used in real code.
"""
(st_size, st_mtime, st_ctime, st_dev, st_ino,
st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat))
return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
def _bisect_path_left(paths, path):
"""Return the index where to insert path into paths.
This uses the dirblock sorting. So all children in a directory come before
the children of children. For example::
a/
b/
c
d/
e
b-c
d-e
a-a
a=c
Will be sorted as::
a
a-a
a=c
a/b
a/b-c
a/d
a/d-e
a/b/c
a/d/e
:param paths: A list of paths to search through
:param path: A single path to insert
:return: An offset where 'path' can be inserted.
:seealso: bisect.bisect_left
"""
hi = len(paths)
lo = 0
while lo < hi:
mid = (lo + hi) // 2
# Grab the dirname for the current dirblock
cur = paths[mid]
if _cmp_path_by_dirblock(cur, path) < 0:
lo = mid + 1
else:
hi = mid
return lo
def _bisect_path_right(paths, path):
"""Return the index where to insert path into paths.
This uses a path-wise comparison so we get::
a
a-b
a=b
a/b
Rather than::
a
a-b
a/b
a=b
:param paths: A list of paths to search through
:param path: A single path to insert
:return: An offset where 'path' can be inserted.
:seealso: bisect.bisect_right
"""
hi = len(paths)
lo = 0
while lo < hi:
mid = (lo+hi)//2
# Grab the dirname for the current dirblock
cur = paths[mid]
if _cmp_path_by_dirblock(path, cur) < 0:
hi = mid
else:
lo = mid + 1
return lo
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
"""Return the index where to insert dirname into the dirblocks.
The return value idx is such that all directories blocks in dirblock[:idx]
have names < dirname, and all blocks in dirblock[idx:] have names >=
dirname.
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
slice of a to be searched.
"""
if hi is None:
hi = len(dirblocks)
try:
dirname_split = cache[dirname]
except KeyError:
dirname_split = dirname.split('/')
cache[dirname] = dirname_split
while lo < hi:
mid = (lo + hi) // 2
# Grab the dirname for the current dirblock
cur = dirblocks[mid][0]
try:
cur_split = cache[cur]
except KeyError:
cur_split = cur.split('/')
cache[cur] = cur_split
if cur_split < dirname_split: lo = mid + 1
else: hi = mid
return lo
def cmp_by_dirs(path1, path2):
"""Compare two paths directory by directory.
This is equivalent to doing::
cmp(path1.split('/'), path2.split('/'))
The idea is that you should compare path components separately. This
differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
:param path1: first path
:param path2: second path
:return: negative number if ``path1`` comes first,
0 if paths are equal,
and positive number if ``path2`` sorts first
"""
if not isinstance(path1, str):
raise TypeError("'path1' must be a plain string, not %s: %r"
% (type(path1), path1))
if not isinstance(path2, str):
raise TypeError("'path2' must be a plain string, not %s: %r"
% (type(path2), path2))
return cmp(path1.split('/'), path2.split('/'))
def _cmp_path_by_dirblock(path1, path2):
"""Compare two paths based on what directory they are in.
This generates a sort order, such that all children of a directory are
sorted together, and grandchildren are in the same order as the
children appear. But all grandchildren come after all children.
:param path1: first path
:param path2: the second path
:return: negative number if ``path1`` comes first,
0 if paths are equal
and a positive number if ``path2`` sorts first
"""
if not isinstance(path1, str):
raise TypeError("'path1' must be a plain string, not %s: %r"
% (type(path1), path1))
if not isinstance(path2, str):
raise TypeError("'path2' must be a plain string, not %s: %r"
% (type(path2), path2))
dirname1, basename1 = os.path.split(path1)
key1 = (dirname1.split('/'), basename1)
dirname2, basename2 = os.path.split(path2)
key2 = (dirname2.split('/'), basename2)
return cmp(key1, key2)
def _read_dirblocks(state):
"""Read in the dirblocks for the given DirState object.
This is tightly bound to the DirState internal representation. It should be
thought of as a member function, which is only separated out so that we can
re-write it in pyrex.
:param state: A DirState object.
:return: None
"""
state._state_file.seek(state._end_of_header)
text = state._state_file.read()
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
fields = text.split('\0')
# Remove the last blank entry
trailing = fields.pop()
if trailing != '':
raise errors.DirstateCorrupt(state,
'trailing garbage: %r' % (trailing,))
# consider turning fields into a tuple.
# skip the first field which is the trailing null from the header.
cur = 1
# Each line now has an extra '\n' field which is not used
# so we just skip over it
# entry size:
# 3 fields for the key
# + number of fields per tree_data (5) * tree count
# + newline
num_present_parents = state._num_present_parents()
tree_count = 1 + num_present_parents
entry_size = state._fields_per_entry()
expected_field_count = entry_size * state._num_entries
field_count = len(fields)
# this checks our adjustment, and also catches file too short.
if field_count - cur != expected_field_count:
raise errors.DirstateCorrupt(state,
'field count incorrect %s != %s, entry_size=%s, '\
'num_entries=%s fields=%r' % (
field_count - cur, expected_field_count, entry_size,
state._num_entries, fields))
if num_present_parents == 1:
# Bind external functions to local names
_int = int
# We access all fields in order, so we can just iterate over
# them. Grab an straight iterator over the fields. (We use an
# iterator because we don't want to do a lot of additions, nor
# do we want to do a lot of slicing)
next = iter(fields).next
# Move the iterator to the current position
for x in xrange(cur):
next()
# The two blocks here are deliberate: the root block and the
# contents-of-root block.
state._dirblocks = [('', []), ('', [])]
current_block = state._dirblocks[0][1]
current_dirname = ''
append_entry = current_block.append
for count in xrange(state._num_entries):
dirname = next()
name = next()
file_id = next()
if dirname != current_dirname:
# new block - different dirname
current_block = []
current_dirname = dirname
state._dirblocks.append((current_dirname, current_block))
append_entry = current_block.append
# we know current_dirname == dirname, so re-use it to avoid
# creating new strings
entry = ((current_dirname, name, file_id),
[(# Current Tree
next(), # minikind
next(), # fingerprint
_int(next()), # size
next() == 'y', # executable
next(), # packed_stat or revision_id
),
( # Parent 1
next(), # minikind
next(), # fingerprint
_int(next()), # size
next() == 'y', # executable
next(), # packed_stat or revision_id
),
])
trailing = next()
if trailing != '\n':
raise ValueError("trailing garbage in dirstate: %r" % trailing)
# append the entry to the current block
append_entry(entry)
state._split_root_dirblock_into_contents()
else:
fields_to_entry = state._get_fields_to_entry()
entries = [fields_to_entry(fields[pos:pos+entry_size])
for pos in xrange(cur, field_count, entry_size)]
state._entries_to_current_state(entries)
# To convert from format 2 => format 3
# state._dirblocks = sorted(state._dirblocks,
# key=lambda blk:blk[0].split('/'))
# To convert from format 3 => format 2
# state._dirblocks = sorted(state._dirblocks)
state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
|