265
265
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
268
def _unpack_stat(packed_stat):
269
"""Turn a packed_stat back into the stat fields.
271
This is meant as a debugging tool, should not be used in real code.
273
(st_size, st_mtime, st_ctime, st_dev, st_ino,
274
st_mode) = struct.unpack('>LLLLLL', binascii.a2b_base64(packed_stat))
275
return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
276
st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
268
279
class SHA1Provider(object):
269
280
"""An interface for getting sha1s of a file."""
364
376
HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
365
377
HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
367
def __init__(self, path, sha1_provider):
379
def __init__(self, path, sha1_provider, worth_saving_limit=0):
368
380
"""Create a DirState object.
370
382
:param path: The path at which the dirstate file on disk should live.
371
383
:param sha1_provider: an object meeting the SHA1Provider interface.
384
:param worth_saving_limit: when the exact number of hash changed
385
entries is known, only bother saving the dirstate if more than
386
this count of entries have changed.
387
-1 means never save hash changes, 0 means always save hash changes.
373
389
# _header_state and _dirblock_state represent the current state
374
390
# of the dirstate metadata and the per-row data respectiely.
412
428
self._last_block_index = None
413
429
self._last_entry_index = None
430
# The set of known hash changes
431
self._known_hash_changes = set()
432
# How many hash changed entries can we have without saving
433
self._worth_saving_limit = worth_saving_limit
415
435
def __repr__(self):
416
436
return "%s(%r)" % \
417
437
(self.__class__.__name__, self._filename)
439
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
440
"""Mark this dirstate as modified.
442
:param hash_changed_entries: if non-None, mark just these entries as
443
having their hash modified.
444
:param header_modified: mark the header modified as well, not just the
447
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
448
if hash_changed_entries:
449
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
450
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
451
DirState.IN_MEMORY_UNMODIFIED):
452
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
453
# that takes precedence.
454
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
456
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
457
# should fail noisily if someone tries to set
458
# IN_MEMORY_MODIFIED but we don't have a write-lock!
459
# We don't know exactly what changed so disable smart saving
460
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
462
self._header_state = DirState.IN_MEMORY_MODIFIED
464
def _mark_unmodified(self):
465
"""Mark this dirstate as unmodified."""
466
self._header_state = DirState.IN_MEMORY_UNMODIFIED
467
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
468
self._known_hash_changes = set()
419
470
def add(self, path, file_id, kind, stat, fingerprint):
420
471
"""Add a path to be tracked.
547
598
if kind == 'directory':
548
599
# insert a new dirblock
549
600
self._ensure_block(block_index, entry_index, utf8path)
550
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
601
self._mark_modified()
551
602
if self._id_index:
552
603
self._add_to_id_index(self._id_index, entry_key)
1020
1071
self._ghosts = []
1021
1072
self._parents = [parents[0]]
1022
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1023
self._header_state = DirState.IN_MEMORY_MODIFIED
1073
self._mark_modified(header_modified=True)
1025
1075
def _empty_parent_info(self):
1026
1076
return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1556
1606
# the active tree.
1557
1607
raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1559
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1560
self._header_state = DirState.IN_MEMORY_MODIFIED
1609
self._mark_modified(header_modified=True)
1561
1610
self._id_index = None
1734
1783
self._sha_cutoff_time()
1735
1784
if (stat_value.st_mtime < self._cutoff_time
1736
1785
and stat_value.st_ctime < self._cutoff_time):
1737
entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
1739
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1786
entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1788
self._mark_modified([entry])
1741
1790
def _sha_cutoff_time(self):
1742
1791
"""Return cutoff time.
1800
1849
"""Serialise the entire dirstate to a sequence of lines."""
1801
1850
if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1802
1851
self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1803
# read whats on disk.
1852
# read what's on disk.
1804
1853
self._state_file.seek(0)
1805
1854
return self._state_file.readlines()
1807
1856
lines.append(self._get_parents_line(self.get_parent_ids()))
1808
1857
lines.append(self._get_ghosts_line(self._ghosts))
1809
# append the root line which is special cased
1810
lines.extend(map(self._entry_to_line, self._iter_entries()))
1858
lines.extend(self._get_entry_lines())
1811
1859
return self._get_output_lines(lines)
1813
1861
def _get_ghosts_line(self, ghost_ids):
1818
1866
"""Create a line for the state file for parents information."""
1819
1867
return '\0'.join([str(len(parent_ids))] + parent_ids)
1869
def _get_entry_lines(self):
1870
"""Create lines for entries."""
1871
return map(self._entry_to_line, self._iter_entries())
1821
1873
def _get_fields_to_entry(self):
1822
1874
"""Get a function which converts entry fields into a entry record.
2089
2141
executable = False
2091
2143
raise Exception("can't pack %s" % inv_entry)
2092
return (minikind, fingerprint, size, executable, tree_data)
2144
return static_tuple.StaticTuple(minikind, fingerprint, size,
2145
executable, tree_data)
2094
2147
def _iter_child_entries(self, tree_index, path_utf8):
2095
2148
"""Iterate over all the entries that are children of path_utf.
2211
2264
"""The number of parent entries in each record row."""
2212
2265
return len(self._parents) - len(self._ghosts)
2215
def on_file(path, sha1_provider=None):
2268
def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2216
2269
"""Construct a DirState on the file at path "path".
2218
2271
:param path: The path at which the dirstate file on disk should live.
2219
2272
:param sha1_provider: an object meeting the SHA1Provider interface.
2220
2273
If None, a DefaultSHA1Provider is used.
2274
:param worth_saving_limit: when the exact number of hash changed
2275
entries is known, only bother saving the dirstate if more than
2276
this count of entries have changed. -1 means never save.
2221
2277
:return: An unlocked DirState object, associated with the given path.
2223
2279
if sha1_provider is None:
2224
2280
sha1_provider = DefaultSHA1Provider()
2225
result = DirState(path, sha1_provider)
2281
result = cls(path, sha1_provider,
2282
worth_saving_limit=worth_saving_limit)
2228
2285
def _read_dirblocks_if_needed(self):
2320
2377
trace.mutter('Not saving DirState because '
2321
2378
'_changes_aborted is set.')
2323
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2324
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2380
# TODO: Since we now distinguish IN_MEMORY_MODIFIED from
2381
# IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2382
# to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
2383
# fail to save IN_MEMORY_MODIFIED
2384
if self._worth_saving():
2326
2385
grabbed_write_lock = False
2327
2386
if self._lock_state != 'w':
2328
2387
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2336
2395
# We couldn't grab a write lock, so we switch back to a read one
2398
lines = self.get_lines()
2339
2399
self._state_file.seek(0)
2340
self._state_file.writelines(self.get_lines())
2400
self._state_file.writelines(lines)
2341
2401
self._state_file.truncate()
2342
2402
self._state_file.flush()
2343
self._header_state = DirState.IN_MEMORY_UNMODIFIED
2344
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
2403
self._mark_unmodified()
2346
2405
if grabbed_write_lock:
2347
2406
self._lock_token = self._lock_token.restore_read_lock()
2350
2409
# not changed contents. Since restore_read_lock may
2351
2410
# not be an atomic operation.
2412
def _worth_saving(self):
2413
"""Is it worth saving the dirstate or not?"""
2414
if (self._header_state == DirState.IN_MEMORY_MODIFIED
2415
or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2417
if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
2418
if self._worth_saving_limit == -1:
2419
# We never save hash changes when the limit is -1
2421
# If we're using smart saving and only a small number of
2422
# entries have changed their hash, don't bother saving. John has
2423
# suggested using a heuristic here based on the size of the
2424
# changed files and/or tree. For now, we go with a configurable
2425
# number of changes, keeping the calculation time
2426
# as low overhead as possible. (This also keeps all existing
2427
# tests passing as the default is 0, i.e. always save.)
2428
if len(self._known_hash_changes) >= self._worth_saving_limit:
2353
2432
def _set_data(self, parent_ids, dirblocks):
2354
2433
"""Set the full dirstate data in memory.
2364
2443
# our memory copy is now authoritative.
2365
2444
self._dirblocks = dirblocks
2366
self._header_state = DirState.IN_MEMORY_MODIFIED
2367
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2445
self._mark_modified(header_modified=True)
2368
2446
self._parents = list(parent_ids)
2369
2447
self._id_index = None
2370
2448
self._packed_stat_index = None
2390
2468
self._make_absent(entry)
2391
2469
self.update_minimal(('', '', new_id), 'd',
2392
2470
path_utf8='', packed_stat=entry[1][0][4])
2393
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2471
self._mark_modified()
2472
# XXX: This was added by Ian, we need to make sure there
2473
# are tests for it, because it isn't in bzr.dev TRUNK
2474
# It looks like the only place it is called is in setting the root
2475
# id of the tree. So probably we never had an _id_index when we
2476
# don't even have a root yet.
2477
if self._id_index is not None:
2478
self._add_to_id_index(self._id_index, entry[0])
2395
2480
def set_parent_trees(self, trees, ghosts):
2396
2481
"""Set the parent trees for the dirstate.
2441
2526
parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
2442
2527
# how many trees do we end up with
2443
2528
parent_count = len(parent_trees)
2529
st = static_tuple.StaticTuple
2445
2531
# one: the current tree
2446
2532
for entry in self._iter_entries():
2463
2549
# the suffix is from tree_index+1:parent_count+1.
2464
2550
new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2465
2551
# now stitch in all the entries from this tree
2466
for path, entry in tree.inventory.iter_entries_by_dir():
2553
for path, entry in tree.iter_entries_by_dir():
2467
2554
# here we process each trees details for each item in the tree.
2468
2555
# we first update any existing entries for the id at other paths,
2469
2556
# then we either create or update the entry for the id at the
2476
2563
file_id = entry.file_id
2477
2564
path_utf8 = path.encode('utf8')
2478
2565
dirname, basename = osutils.split(path_utf8)
2479
new_entry_key = (dirname, basename, file_id)
2566
if dirname == last_dirname:
2567
# Try to re-use objects as much as possible
2568
dirname = last_dirname
2570
last_dirname = dirname
2571
new_entry_key = st(dirname, basename, file_id)
2480
2572
# tree index consistency: All other paths for this id in this tree
2481
2573
# index must point to the correct path.
2482
for entry_key in id_index.get(file_id, ()):
2574
entry_keys = id_index.get(file_id, ())
2575
for entry_key in entry_keys:
2483
2576
# TODO:PROFILING: It might be faster to just update
2484
2577
# rather than checking if we need to, and then overwrite
2485
2578
# the one we are located at.
2488
2581
# other trees, so put absent pointers there
2489
2582
# This is the vertical axis in the matrix, all pointing
2490
2583
# to the real path.
2491
by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2492
# by path consistency: Insert into an existing path record (trivial), or
2493
# add a new one with relocation pointers for the other tree indexes.
2494
entry_keys = id_index.get(file_id, ())
2584
by_path[entry_key][tree_index] = st('r', path_utf8, 0,
2586
# by path consistency: Insert into an existing path record
2587
# (trivial), or add a new one with relocation pointers for the
2588
# other tree indexes.
2495
2589
if new_entry_key in entry_keys:
2496
# there is already an entry where this data belongs, just insert it.
2590
# there is already an entry where this data belongs, just
2497
2592
by_path[new_entry_key][tree_index] = \
2498
2593
self._inv_entry_to_details(entry)
2509
2604
new_details.append(DirState.NULL_PARENT_DETAILS)
2511
2606
# grab any one entry, use it to find the right path.
2512
# TODO: optimise this to reduce memory use in highly
2513
# fragmented situations by reusing the relocation
2515
2607
a_key = iter(entry_keys).next()
2516
2608
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2517
# its a pointer or missing statement, use it as is.
2609
# its a pointer or missing statement, use it as
2518
2611
new_details.append(by_path[a_key][lookup_index])
2520
2613
# we have the right key, make a pointer to it.
2521
2614
real_path = ('/'.join(a_key[0:2])).strip('/')
2522
new_details.append(('r', real_path, 0, False, ''))
2615
new_details.append(st('r', real_path, 0, False,
2523
2617
new_details.append(self._inv_entry_to_details(entry))
2524
2618
new_details.extend(new_location_suffix)
2525
2619
by_path[new_entry_key] = new_details
2531
2625
self._entries_to_current_state(new_entries)
2532
2626
self._parents = [rev_id for rev_id, tree in trees]
2533
2627
self._ghosts = list(ghosts)
2534
self._header_state = DirState.IN_MEMORY_MODIFIED
2535
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2628
self._mark_modified(header_modified=True)
2536
2629
self._id_index = id_index
2538
2631
def _sort_entries(self, entry_list):
2542
2635
try to keep everything in sorted blocks all the time, but sometimes
2543
2636
it's easier to sort after the fact.
2638
# When sorting, we usually have 10x more entries than directories. (69k
2639
# total entries, 4k directories). So cache the results of splitting.
2640
# Saving time and objects. Also, use StaticTuple to avoid putting all
2641
# of these object into python's garbage collector.
2643
def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
2546
2644
# sort by: directory parts, file name, file id
2547
return entry[0][0].split('/'), entry[0][1], entry[0][2]
2645
dirpath, fname, file_id = entry[0]
2647
split = _split_dirs[dirpath]
2649
split = _st.from_sequence(dirpath.split('/'))
2650
_split_dirs[dirpath] = split
2651
return _st(split, fname, file_id)
2548
2652
return sorted(entry_list, key=_key)
2550
2654
def set_state_from_inventory(self, new_inv):
2675
2779
current_old[0][1].decode('utf8'))
2676
2780
self._make_absent(current_old)
2677
2781
current_old = advance(old_iterator)
2678
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2782
self._mark_modified()
2679
2783
self._id_index = None
2680
2784
self._packed_stat_index = None
2682
2786
trace.mutter("set_state_from_inventory complete.")
2788
def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts):
2789
"""Wipe the currently stored state and set it to something new.
2791
This is a hard-reset for the data we are working with.
2793
# Technically, we really want a write lock, but until we write, we
2794
# don't really need it.
2795
self._requires_lock()
2796
# root dir and root dir contents with no children. We have to have a
2797
# root for set_state_from_inventory to work correctly.
2798
empty_root = (('', '', inventory.ROOT_ID),
2799
[('d', '', 0, False, DirState.NULLSTAT)])
2800
empty_tree_dirblocks = [('', [empty_root]), ('', [])]
2801
self._set_data([], empty_tree_dirblocks)
2802
self.set_state_from_inventory(working_inv)
2803
self.set_parent_trees(parent_trees, parent_ghosts)
2684
2805
def _make_absent(self, current_old):
2685
2806
"""Mark current_old - an entry - as absent for tree 0.
2730
2851
if update_tree_details[0][0] == 'a': # absent
2731
2852
raise AssertionError('bad row %r' % (update_tree_details,))
2732
2853
update_tree_details[0] = DirState.NULL_PARENT_DETAILS
2733
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2854
self._mark_modified()
2734
2855
return last_reference
2736
2857
def update_minimal(self, key, minikind, executable=False, fingerprint='',
2905
3026
if not present:
2906
3027
self._dirblocks.insert(block_index, (subdir_key[0], []))
2908
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
3029
self._mark_modified()
2910
3031
def _maybe_remove_row(self, block, index, id_index):
2911
3032
"""Remove index if it is absent or relocated across the row.
3199
3321
entry[1][0] = ('f', '', stat_value.st_size,
3200
3322
executable, DirState.NULLSTAT)
3323
worth_saving = False
3201
3324
elif minikind == 'd':
3202
3325
link_or_sha1 = None
3203
3326
entry[1][0] = ('d', '', 0, False, packed_stat)
3209
3332
state._get_block_entry_index(entry[0][0], entry[0][1], 0)
3210
3333
state._ensure_block(block_index, entry_index,
3211
3334
osutils.pathjoin(entry[0][0], entry[0][1]))
3336
worth_saving = False
3212
3337
elif minikind == 'l':
3338
if saved_minikind == 'l':
3339
worth_saving = False
3213
3340
link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3214
3341
if state._cutoff_time is None:
3215
3342
state._sha_cutoff_time()
3221
3348
entry[1][0] = ('l', '', stat_value.st_size,
3222
3349
False, DirState.NULLSTAT)
3223
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3351
state._mark_modified([entry])
3224
3352
return link_or_sha1