2474.1.1
by John Arbash Meinel
Create a Pyrex extension for reading the dirstate file. |
1 |
# Copyright (C) 2007 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Helper functions for DirState.
|
|
18 |
||
19 |
This is the python implementation for DirState functions.
|
|
20 |
"""
|
|
21 |
||
22 |
from bzrlib.dirstate import DirState |
|
23 |
||
24 |
||
2474.1.7
by John Arbash Meinel
Add some tests for a helper function that lets us |
25 |
cdef extern from *: |
26 |
ctypedef int size_t |
|
27 |
||
2474.1.4
by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex. |
28 |
cdef extern from "Python.h": |
29 |
# GetItem returns a borrowed reference
|
|
30 |
void *PyDict_GetItem(object p, object key) |
|
31 |
int PyDict_SetItem(object p, object key, object val) except -1 |
|
32 |
object PyList_GetItem(object lst, int index) |
|
33 |
object PyTuple_GetItem(object tpl, int index) |
|
34 |
||
2474.1.7
by John Arbash Meinel
Add some tests for a helper function that lets us |
35 |
char* PyString_AsString(object p) |
36 |
int PyString_Size(object p) |
|
37 |
||
38 |
||
39 |
cdef extern from "string.h": |
|
40 |
int strncmp(char *s1, char *s2, size_t len) |
|
41 |
int strcmp(char *s1, char *s2) |
|
42 |
char *strchr(char *s1, char c) |
|
43 |
||
2474.1.4
by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex. |
44 |
|
45 |
cdef object _split_from_path(object cache, object path): |
|
46 |
"""get the dirblock tuple for a given path.
|
|
47 |
||
48 |
:param cache: A Dictionary mapping string paths to tuples
|
|
49 |
:param path: The path we care about.
|
|
50 |
:return: A borrowed reference to a tuple stored in cache.
|
|
51 |
You do not need to Py_DECREF() when you are done, unless you plan on
|
|
52 |
using it for a while.
|
|
53 |
"""
|
|
54 |
cdef void* value_ptr |
|
55 |
cdef object value |
|
56 |
||
57 |
value_ptr = PyDict_GetItem(cache, path) |
|
58 |
if value_ptr == NULL: |
|
59 |
value = path.split('/') |
|
60 |
PyDict_SetItem(cache, path, value) |
|
61 |
else: |
|
62 |
value = <object>value_ptr |
|
63 |
||
64 |
return value |
|
65 |
||
66 |
||
2474.1.5
by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster. |
67 |
cdef int _bisect_dirblock_nocache(object dirblocks, object dirname, int _lo, int _hi): |
68 |
cdef int _mid |
|
69 |
cdef object cur |
|
70 |
cdef object cur_split |
|
71 |
cdef object dirname_split |
|
72 |
||
73 |
dirname_split = dirname.split('/') |
|
74 |
||
75 |
while _lo < _hi: |
|
76 |
_mid = (_lo+_hi)/2 |
|
77 |
# Grab the dirname for the current dirblock
|
|
78 |
cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0) |
|
79 |
cur_split = cur.split('/') |
|
80 |
if cur_split < dirname_split: _lo = _mid+1 |
|
81 |
else: _hi = _mid |
|
82 |
return _lo |
|
83 |
||
84 |
||
2474.1.7
by John Arbash Meinel
Add some tests for a helper function that lets us |
85 |
cdef int _cmp_dirblock_strings(char *path1, int size1, char *path2, int size2): |
86 |
"""This compares 2 strings separating on path sections.
|
|
87 |
||
88 |
This is equivalent to "cmp(path1.split('/'), path2.split('/'))"
|
|
89 |
However, we don't want to create an extra object for doing the split.
|
|
90 |
||
91 |
:param path1: The first path to compare
|
|
92 |
:param size1: The length of the first path
|
|
93 |
:param path2: The second path
|
|
94 |
:param size1: The length of the second path
|
|
95 |
:return: 0 if they are equal, -1 if path1 comes first, 1 if path2 comes
|
|
96 |
first
|
|
97 |
"""
|
|
98 |
cdef char *base1 |
|
99 |
cdef char *base2 |
|
100 |
cdef char *tip1 |
|
101 |
cdef char *tip2 |
|
102 |
cdef char *end1 |
|
103 |
cdef char *end2 |
|
104 |
cdef int cur_len1 |
|
105 |
cdef int cur_len2 |
|
106 |
cdef int cmp_len |
|
107 |
cdef int diff |
|
108 |
||
109 |
base1 = path1 |
|
110 |
base2 = path2 |
|
111 |
end1 = base1 + size1 |
|
112 |
end2 = base2 + size2 |
|
113 |
||
114 |
# Ensure that we are pointing to the final NULL terminator on both ends
|
|
115 |
assert end1[0] == c'\x00' |
|
116 |
assert end2[0] == c'\x00' |
|
117 |
||
118 |
while base1 < end1 and base2 < end2: |
|
119 |
# Find the next path separator
|
|
120 |
# (This is where you would like strchrnul)
|
|
121 |
tip1 = strchr(base1, c'/') |
|
122 |
tip2 = strchr(base2, c'/') |
|
123 |
||
124 |
if tip1 == NULL: |
|
125 |
tip1 = end1 |
|
126 |
if tip2 == NULL: |
|
127 |
tip2 = end2 |
|
128 |
||
129 |
cur_len1 = tip1 - base1 |
|
130 |
cur_len2 = tip2 - base2 |
|
131 |
cmp_len = cur_len1 |
|
132 |
if cur_len2 < cur_len1: |
|
133 |
cmp_len = cur_len2 |
|
134 |
||
135 |
diff = strncmp(base1, base2, cmp_len) |
|
136 |
# print 'comparing "%s", "%s", %d = %d' % (base1, base2, cmp_len, diff)
|
|
137 |
if diff != 0: |
|
138 |
return diff |
|
139 |
if cur_len1 < cur_len2: |
|
140 |
return -1 |
|
141 |
elif cur_len1 > cur_len2: |
|
142 |
return 1 |
|
143 |
base1 = tip1+1 |
|
144 |
base2 = tip2+1 |
|
145 |
# Do we still have uncompared characters?
|
|
146 |
if base1 < end1: |
|
147 |
return 1 |
|
148 |
if base2 < end2: |
|
149 |
return -1 |
|
150 |
return 0 |
|
151 |
||
152 |
||
153 |
def cmp_dirblock_strings(path1, path2): |
|
154 |
"""Compare to python strings in dirblock fashion."""
|
|
155 |
return _cmp_dirblock_strings(PyString_AsString(path1), |
|
156 |
PyString_Size(path1), |
|
157 |
PyString_AsString(path2), |
|
158 |
PyString_Size(path2)) |
|
159 |
||
160 |
||
2474.1.5
by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster. |
161 |
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None): |
2474.1.4
by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex. |
162 |
"""Return the index where to insert dirname into the dirblocks.
|
163 |
||
164 |
The return value idx is such that all directories blocks in dirblock[:idx]
|
|
165 |
have names < dirname, and all blocks in dirblock[idx:] have names >=
|
|
166 |
dirname.
|
|
167 |
||
168 |
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
|
|
169 |
slice of a to be searched.
|
|
170 |
"""
|
|
171 |
cdef int _lo |
|
172 |
cdef int _hi |
|
173 |
cdef int _mid |
|
174 |
cdef object dirname_split |
|
175 |
cdef object cur_split |
|
176 |
||
177 |
if hi is None: |
|
178 |
_hi = len(dirblocks) |
|
179 |
else: |
|
180 |
_hi = hi |
|
181 |
||
182 |
_lo = lo |
|
2474.1.5
by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster. |
183 |
if cache is None: |
184 |
return _bisect_dirblock_nocache(dirblocks, dirname, _lo, _hi) |
|
185 |
||
2474.1.4
by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex. |
186 |
dirname_split = _split_from_path(cache, dirname) |
187 |
while _lo < _hi: |
|
188 |
_mid = (_lo+_hi)/2 |
|
189 |
# Grab the dirname for the current dirblock
|
|
190 |
cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0) |
|
191 |
cur_split = _split_from_path(cache, cur) |
|
192 |
if cur_split < dirname_split: _lo = _mid+1 |
|
193 |
else: _hi = _mid |
|
194 |
return _lo |
|
195 |
||
196 |
||
2474.1.1
by John Arbash Meinel
Create a Pyrex extension for reading the dirstate file. |
197 |
def _read_dirblocks(state): |
198 |
"""Read in the dirblocks for the given DirState object.
|
|
199 |
||
200 |
This is tightly bound to the DirState internal representation. It should be
|
|
201 |
thought of as a member function, which is only separated out so that we can
|
|
202 |
re-write it in pyrex.
|
|
203 |
||
204 |
:param state: A DirState object.
|
|
205 |
:return: None
|
|
206 |
"""
|
|
207 |
cdef int pos |
|
208 |
cdef int entry_size |
|
209 |
cdef int field_count |
|
210 |
||
211 |
state._state_file.seek(state._end_of_header) |
|
212 |
text = state._state_file.read() |
|
213 |
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
|
|
214 |
||
215 |
fields = text.split('\0') |
|
216 |
# Remove the last blank entry
|
|
217 |
trailing = fields.pop() |
|
218 |
assert trailing == '' |
|
219 |
# consider turning fields into a tuple.
|
|
220 |
||
221 |
# skip the first field which is the trailing null from the header.
|
|
222 |
cur = 1 |
|
223 |
# Each line now has an extra '\n' field which is not used
|
|
224 |
# so we just skip over it
|
|
225 |
# entry size:
|
|
226 |
# 3 fields for the key
|
|
227 |
# + number of fields per tree_data (5) * tree count
|
|
228 |
# + newline
|
|
229 |
num_present_parents = state._num_present_parents() |
|
230 |
tree_count = 1 + num_present_parents |
|
231 |
entry_size = state._fields_per_entry() |
|
232 |
expected_field_count = entry_size * state._num_entries |
|
233 |
field_count = len(fields) |
|
234 |
# this checks our adjustment, and also catches file too short.
|
|
235 |
assert field_count - cur == expected_field_count, \ |
|
236 |
'field count incorrect %s != %s, entry_size=%s, '\ |
|
237 |
'num_entries=%s fields=%r' % ( |
|
238 |
field_count - cur, expected_field_count, entry_size, |
|
239 |
state._num_entries, fields) |
|
240 |
||
241 |
if num_present_parents == 1: |
|
242 |
# Bind external functions to local names
|
|
243 |
_int = int |
|
244 |
# We access all fields in order, so we can just iterate over
|
|
245 |
# them. Grab an straight iterator over the fields. (We use an
|
|
246 |
# iterator because we don't want to do a lot of additions, nor
|
|
247 |
# do we want to do a lot of slicing)
|
|
248 |
next = iter(fields).next |
|
249 |
# Move the iterator to the current position
|
|
250 |
for x in xrange(cur): |
|
251 |
next() |
|
252 |
# The two blocks here are deliberate: the root block and the
|
|
253 |
# contents-of-root block.
|
|
254 |
state._dirblocks = [('', []), ('', [])] |
|
255 |
current_block = state._dirblocks[0][1] |
|
256 |
current_dirname = '' |
|
257 |
append_entry = current_block.append |
|
258 |
for count in xrange(state._num_entries): |
|
259 |
dirname = next() |
|
260 |
name = next() |
|
261 |
file_id = next() |
|
262 |
if dirname != current_dirname: |
|
263 |
# new block - different dirname
|
|
264 |
current_block = [] |
|
265 |
current_dirname = dirname |
|
266 |
state._dirblocks.append((current_dirname, current_block)) |
|
267 |
append_entry = current_block.append |
|
268 |
# we know current_dirname == dirname, so re-use it to avoid
|
|
269 |
# creating new strings
|
|
270 |
entry = ((current_dirname, name, file_id), |
|
271 |
[(# Current Tree |
|
272 |
next(), # minikind |
|
273 |
next(), # fingerprint |
|
274 |
_int(next()), # size |
|
275 |
next() == 'y', # executable |
|
276 |
next(), # packed_stat or revision_id |
|
277 |
),
|
|
278 |
( # Parent 1 |
|
279 |
next(), # minikind |
|
280 |
next(), # fingerprint |
|
281 |
_int(next()), # size |
|
282 |
next() == 'y', # executable |
|
283 |
next(), # packed_stat or revision_id |
|
284 |
),
|
|
285 |
])
|
|
286 |
trailing = next() |
|
287 |
assert trailing == '\n' |
|
288 |
# append the entry to the current block
|
|
289 |
append_entry(entry) |
|
290 |
state._split_root_dirblock_into_contents() |
|
291 |
else: |
|
292 |
||
293 |
fields_to_entry = state._get_fields_to_entry() |
|
294 |
entries = [] |
|
295 |
entries_append = entries.append |
|
296 |
pos = cur |
|
297 |
entry_size = entry_size |
|
298 |
while pos < field_count: |
|
299 |
entries_append(fields_to_entry(fields[pos:pos+entry_size])) |
|
300 |
pos = pos + entry_size |
|
301 |
state._entries_to_current_state(entries) |
|
302 |
# To convert from format 2 => format 3
|
|
303 |
# state._dirblocks = sorted(state._dirblocks,
|
|
304 |
# key=lambda blk:blk[0].split('/'))
|
|
305 |
# To convert from format 3 => format 2
|
|
306 |
# state._dirblocks = sorted(state._dirblocks)
|
|
307 |
state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED |