49
49
because we wont want to restat all files on disk just because a lock was
50
50
acquired, yet we cannot trust the data after the previous lock was released.
52
DirState State machines? Strategy needed ?
53
We have a number of key states:
54
A memory object exists, disk data untouched.
55
B have read just the parents details to answer common queries
56
C have read the entire dirstate, so can answer questions about the tree
58
D have altered some part of the directory data, can incrementally save.
59
i.e. have refreshed a stat hit for a single file.
60
E (full-dirty) have altered some part of the directory data, cannot
61
incrementally save. I.e. have added or deleted a file, or added or deleted
62
parents to the dirstate itself.
64
currently _header_read = True means the header is read, can be in B through E
65
_clean = True means the in memory representation is exactly whats
67
_clean = False -> A or B or D or E
68
_header_read = False means A
70
52
Memory representation:
71
Each row will be a tuple that has:
72
current_row_data_tuple, parent_list
73
current_row_data = [dirname, basename, fullkind, fileid, size, packed-stat, linkvalue]
74
parents_list = [(revision, kind, dirname, basename, size, executable_bool, sha1) ...]
75
row = (current_data, parents_list)
77
Still need to address how rows are managed:
79
vector of all rows or one vector per directory ?
80
vector of all rows allows trivial bisection to find paths
81
54
vecter of all directories, and vector of the childen ?
56
root_row = (direntry for root, [parent_direntries_for_root]),
85
58
('', ['data for achild', 'data for bchild', 'data for cchild'])
86
59
('dir', ['achild', 'cchild', 'echild'])
158
131
# of using int conversion rather than a dict here. AND BLAME ANDREW IF
135
IN_MEMORY_UNMODIFIED = 1
136
IN_MEMORY_MODIFIED = 2
161
138
def __init__(self):
162
self._header_read = False
139
# _header_state and _dirblock_state represent the current state
140
# of the dirstate metadata and the per-row data respectiely.
141
# NOT_IN_MEMORY indicates that no data is in memory
142
# IN_MEMORY_UNMODIFIED indicates that what we have in memory
143
# is the same as is on disk
144
# IN_MEMORY_MODIFIED indicates that we have a modified version
145
# of what is on disk.
146
# In future we will add more granularity, for instance _dirblock_state
147
# will probably support partially-in-memory as a separate variable,
148
# allowing for partially-in-memory unmodified and partially-in-memory
150
self._header_state = DirState.NOT_IN_MEMORY
151
self._dirblock_state = DirState.NOT_IN_MEMORY
164
154
self._parents = []
155
self._state_file=None
166
157
def add_parent_tree(self, tree_id, tree):
167
158
"""Add tree as a parent to this dirstate."""
159
self._read_dirblocks_if_needed()
168
160
self._parents.append(tree_id)
161
self._header_state = DirState.IN_MEMORY_MODIFIED
163
self._ghosts.append(tree_id)
166
def from_tree(tree, dir_state_filename):
173
167
"""Create a dirstate from a bzr Tree.
175
169
:param tree: The tree which should provide parent information and
172
# XXX: aka the big ugly.
178
173
result = DirState()
174
result._state_file = open(dir_state_filename, 'wb+')
182
176
_encode = base64.encodestring
190
184
for parent_id in parent_ids:
191
185
parent_trees.append(tree.branch.repository.revision_tree(parent_id))
193
lines.append(result._get_parents_line(parent_ids))
194
187
# FIXME: is this utf8 safe?
196
189
to_minikind = DirState._kind_to_minikind
197
190
to_yesno = DirState._to_yesno
199
192
st = os.lstat(tree.basedir)
200
null_parent_info = '\0'.join((
208
#, 'd', gen_root_id().encode('utf8')
211
, 'd', tree.inventory.root.file_id.encode('utf8')
195
, 'directory', tree.inventory.root.file_id.encode('utf8')
196
, 0 # no point having a size for dirs.
215
] + [null_parent_info]*num_parents
216
# disabled because the root entry has no revision attribute set.
217
# for parent_tree in parent_trees:
218
# root_info.append('\0'.join((
219
# parent_tree.inventory.root.revision.encode('utf8'),
201
for parent_tree in parent_trees:
202
root_parents.append((
203
parent_tree.inventory.root.revision.encode('utf8'),
227
lines.append('\0'.join(root_info))
211
root_row = (root_info, root_parents)
229
213
for dirinfo, block in tree.walkdirs():
214
# dirinfo is path, id
216
# add the row for this block
218
dirblocks.append((dirinfo[0], block_row))
232
219
for relpath, name, kind, st, fileid, versionedkind in block:
233
220
if fileid is None:
234
221
# unversioned file, skip
264
251
row_data = (dirname.encode('utf8'), basename.encode('utf8'),
265
252
kind, fileid.encode('utf8'), st.st_size, pack_stat(st),
267
row_tuple = (row_data, parent_info)
268
lines.append(result._row_to_line(row_tuple))
254
block_row.append((row_data, parent_info))
270
256
# It isn't safe to remove entries while we are iterating
271
257
# over the same list, so remove them now
272
258
for entry in to_remove:
273
259
block.remove(entry)
275
result.lines = result._get_output_lines(lines)
276
result._header_read = True
261
#lines.append(result._get_parents_line(parent_ids))
262
#lines.append(result._get_ghosts_line([]))
263
result._set_data(parent_ids, root_row, dirblocks)
267
def get_ghosts(self):
268
"""Return a list of the parent tree revision ids that are ghosts."""
269
self._read_header_if_needed()
280
272
def get_lines(self):
281
273
"""Serialise the entire dirstate to a sequence of lines."""
274
if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
275
self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
276
# read whats on disk.
277
self._state_file.seek(0)
278
return self._state_file.readlines()
285
280
lines.append(self._get_parents_line(self.get_parent_ids()))
281
lines.append(self._get_ghosts_line(self._ghosts))
286
282
# append the root line which is special cased
287
lines.append(self._row_to_line(self._root_row))
288
self.lines = self._get_output_lines(lines)
283
lines.extend(map(self._row_to_line, self._iter_rows()))
284
return self._get_output_lines(lines)
286
def _get_ghosts_line(self, ghost_ids):
287
"""Create a line for the state file for ghost information."""
288
return '\0'.join([str(len(ghost_ids))] + ghost_ids)
291
290
def _get_parents_line(self, parent_ids):
292
291
"""Create a line for the state file for parents information."""
293
292
return '\0'.join([str(len(parent_ids))] + parent_ids)
307
306
:param path: The name of the file for the dirstate.
308
307
:return: A DirState object.
310
# This constructs a new DirState object on a path, sets the state_file
309
# This constructs a new DirState object on a path, sets the _state_file
311
310
# to a new empty file for that path. It then calls _set_data() with our
312
311
# stock empty dirstate information - a root with ROOT_ID, no children,
313
312
# and no parents. Finally it calls save() to ensure that this data will
315
314
result = DirState()
316
result.state_file = open(path, 'wb+')
317
# a new root directory, with a pack_stat that is just noise and will
315
result._state_file = open(path, 'wb+')
316
# a new root directory, with a pack_stat (the x's) that is just noise and will
318
317
# never match the output of base64 encode.
319
root_row_data = ('', '', 'directory', bzrlib.inventory.ROOT_ID, 0, 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', '')
318
root_row_data = ('', '', 'directory', bzrlib.inventory.ROOT_ID, 0, 'x'*32, '')
320
319
root_parents = []
321
320
root_row = (root_row_data, root_parents)
322
empty_tree_data = [('', [])] # root dir contents - no entries.
323
result._set_data(root_row, empty_tree_data)
321
empty_tree_dirblocks = [('', [])] # root dir contents - no entries.
322
result._set_data([], root_row, empty_tree_dirblocks)
327
result.state_file.close()
326
result._state_file.close()
330
def _iter_rows(self):
331
"""Iterate over all the row data in the dirstate.
333
Each yelt item is a tuple of (row_data, parent_data_list).
335
self._read_dirblocks_if_needed()
337
for directory in self._dirblocks:
338
for row in directory[1]:
331
341
def _get_output_lines(self, lines):
332
342
"""format lines for final output.
338
348
lines.append('') # a final newline
339
349
inventory_text = '\0\n\0'.join(lines)
340
350
output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
341
# -2, 1 for num parents, 1 for final newline
342
num_entries = len(lines)-2
351
# -3, 1 for num parents, 1 for ghosts, 1 for final newline
352
num_entries = len(lines)-3
343
353
output_lines.append('num_entries: %s\n' % (num_entries,))
344
354
output_lines.append(inventory_text)
345
355
return output_lines
348
358
def on_file(path):
349
359
"""Construct a DirState on the file at path path."""
350
360
result = DirState()
351
result.state_file = open(path, 'rb+')
361
result._state_file = open(path, 'rb+')
355
"""Read the entire state."""
364
def _read_dirblocks_if_needed(self):
365
"""Read in all the dirblocks from the file if they are not in memory."""
366
self._read_header_if_needed()
367
if self._dirblock_state == DirState.NOT_IN_MEMORY:
368
# the _state_file pointer will be positioned at the start of the
370
text = self._state_file.read()
371
# TODO: check the adler checksums. adler_measured = zlib.adler32(text)
373
fields = text.split('\0')
374
# Remove the last blank entry
375
trailing = fields.pop()
376
assert trailing == ''
377
# consider turning fields into a tuple.
379
# skip the first field which is the trailing null from the header.
381
field_count = len(fields)
382
# Each line now has an extra '\n' field which is not used
383
# so we just skip over it
384
# number of fields per dir_entry + number of fields per parent_entry + newline
385
num_parents = len(self._parents)
386
entry_size = 7 + (7 * num_parents) + 1
387
expected_field_count = entry_size * self._num_entries
388
# is the file too short ?
389
assert field_count - cur == expected_field_count, \
390
'field count incorrect %s != %s' % (expected_field_count, field_count)
392
# Fast path the case where there are 1 or 2 parents
394
entries = [(fields[pos:pos+7], []) for pos in xrange(cur, field_count, entry_size)]
395
elif num_parents == 1:
396
entries = [(fields[pos:pos+7], [fields[pos+7:pos+14],])
397
for pos in xrange(cur, field_count, entry_size)]
398
elif num_parents == 2:
399
entries = [(fields[pos:pos+7], [
400
fields[pos+7:pos+14],
401
fields[pos+14:pos+21],])
402
for pos in xrange(cur, field_count, entry_size)]
404
raise NotImplementedError(self._read_dirblocks_if_needed)
406
[fields[chunk:chunk+7] for chunk in xrange(pos, pos+entry_size-1, 7)])
407
for pos in xrange(cur, field_count, entry_size)
410
assert len(entries) == self._num_entries, '%s != %s entries' % (len(entries),
412
entry_iter = iter(entries)
413
self._root_row = entry_iter.next()
414
# convert the minikind to kind
415
self._root_row[0][2] = self._minikind_to_kind[self._root_row[0][2]]
416
# convert the size to an int
417
self._root_row[0][4] = int(self._root_row[0][4])
418
# TODO parent converion
419
# TODO dirblock population
420
for entry in entry_iter:
358
424
def _read_header(self):
359
425
"""This reads in the metadata header, and the parent ids.
364
430
:return: (expected adler checksum, number of entries, parent list)
366
432
self._read_prelude()
367
parent_line = self.state_file.readline()
433
parent_line = self._state_file.readline()
368
434
info = parent_line.split('\0')
369
435
num_parents = int(info[0])
370
436
assert num_parents == len(info)-2, 'incorrect parent info line'
372
437
self._parents = [p.decode('utf8') for p in info[1:-1]]
439
ghost_line = self._state_file.readline()
440
info = ghost_line.split('\0')
441
num_ghosts = int(info[1])
442
assert num_ghosts == len(info)-3, 'incorrect ghost info line'
443
self._ghosts = [p.decode('utf8') for p in info[2:-1]]
444
self._header_state = DirState.IN_MEMORY_UNMODIFIED
374
446
def _read_header_if_needed(self):
375
447
"""Read the header of the dirstate file if needed."""
376
if self._header_read is False:
448
if self._header_state == DirState.NOT_IN_MEMORY:
377
449
self._read_header()
379
451
def _read_prelude(self):
385
457
The next entry in the file should be the number of parents,
386
458
and their ids. Followed by a newline.
388
header = self.state_file.readline()
460
header = self._state_file.readline()
389
461
assert header == '#bazaar dirstate flat format 1\n', \
390
462
'invalid header line: %r' % (header,)
391
adler_line = self.state_file.readline()
463
adler_line = self._state_file.readline()
392
464
assert adler_line.startswith('adler32: '), 'missing adler32 checksum'
393
465
self.adler_expected = int(adler_line[len('adler32: '):-1])
394
num_entries_line = self.state_file.readline()
466
num_entries_line = self._state_file.readline()
395
467
assert num_entries_line.startswith('num_entries: '), 'missing num_entries line'
396
self.num_entries = int(num_entries_line[len('num_entries: '):-1])
468
self._num_entries = int(num_entries_line[len('num_entries: '):-1])
398
470
def _row_to_line(self, row):
399
471
"""Serialize row to a NULL delimited line ready for _get_output_lines.
420
492
"""Save any pending changes created during this session."""
421
self.state_file.seek(0)
422
self.state_file.writelines(self.get_lines())
423
self.state_file.flush()
493
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
494
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
495
self._state_file.seek(0)
496
self._state_file.writelines(self.get_lines())
497
self._state_file.flush()
498
self._header_state = DirState.IN_MEMORY_UNMODIFIED
499
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
426
def _set_data(self, root_row, tree_data):
427
"""Set the full dirstate data to root_row and tree_data.
501
def _set_data(self, parent_ids, root_row, dirblocks):
502
"""Set the full dirstate data in memory.
429
504
This is an internal function used to completely replace the objects
430
505
in memory state. It puts the dirstate into state 'full-dirty'.
507
:param parent_ids: A list of parent tree revision ids.
508
:param root_row: The root row - a tuple of the root direntry and the
509
list of matching direntries from the parent_ids trees.
510
:param dirblocks: A list containing one tuple for each directory in the
511
tree. Each tuple contains the directory path and a list of
512
row data in the same format as root_row.
432
514
# our memory copy is now authoritative.
433
self._header_read = True
515
self._dirblocks = dirblocks
435
516
self._root_row = root_row
436
# should save tree_data.
517
self._header_state = DirState.IN_MEMORY_MODIFIED
518
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
519
self._parents = list(parent_ids)
521
def set_parent_trees(self, trees, ghosts):
522
"""Set the parent trees for the dirstate.
524
:param trees: A list of revision_id, tree tuples. tree must be provided
525
even if the revision_id refers to a ghost: supply an empty tree in
527
:param ghosts: A list of the revision_ids that are ghosts at the time
530
# TODO regenerate self._dirblocks and self._root_row
531
self._read_dirblocks_if_needed()
532
self._parents = [rev_id for rev_id, tree in trees]
533
self._ghosts = list(ghosts)
534
self._header_state = DirState.IN_MEMORY_MODIFIED
535
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
439
538
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):