20
20
lines by NL. The field delimiters are ommitted in the grammar, line delimiters
21
21
are not - this is done for clarity of reading. All string data is in utf8.
25
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
28
WHOLE_NUMBER = {digit}, digit;
30
REVISION_ID = a non-empty utf8 string;
32
dirstate format = header line, full checksum, row count, parent details,
33
ghost_details, entries;
34
header line = "#bazaar dirstate flat format 3", NL;
35
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
36
row count = "num_entries: ", WHOLE_NUMBER, NL;
37
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
38
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
40
entry = entry_key, current_entry_details, {parent_entry_details};
41
entry_key = dirname, basename, fileid;
42
current_entry_details = common_entry_details, working_entry_details;
43
parent_entry_details = common_entry_details, history_entry_details;
44
common_entry_details = MINIKIND, fingerprint, size, executable
45
working_entry_details = packed_stat
46
history_entry_details = REVISION_ID;
49
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
51
Given this definition, the following is useful to know::
53
entry (aka row) - all the data for a given key.
54
entry[0]: The key (dirname, basename, fileid)
58
entry[1]: The tree(s) data for this path and id combination.
59
entry[1][0]: The current tree
60
entry[1][1]: The second tree
62
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate::
64
entry[1][0][0]: minikind
65
entry[1][0][1]: fingerprint
67
entry[1][0][3]: executable
68
entry[1][0][4]: packed_stat
72
entry[1][1][4]: revision_id
23
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
26
WHOLE_NUMBER = {digit}, digit;
28
REVISION_ID = a non-empty utf8 string;
30
dirstate format = header line, full checksum, row count, parent details,
31
ghost_details, entries;
32
header line = "#bazaar dirstate flat format 3", NL;
33
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
34
row count = "num_entries: ", WHOLE_NUMBER, NL;
35
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
36
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
38
entry = entry_key, current_entry_details, {parent_entry_details};
39
entry_key = dirname, basename, fileid;
40
current_entry_details = common_entry_details, working_entry_details;
41
parent_entry_details = common_entry_details, history_entry_details;
42
common_entry_details = MINIKIND, fingerprint, size, executable
43
working_entry_details = packed_stat
44
history_entry_details = REVISION_ID;
47
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
49
Given this definition, the following is useful to know:
50
entry (aka row) - all the data for a given key.
51
entry[0]: The key (dirname, basename, fileid)
55
entry[1]: The tree(s) data for this path and id combination.
56
entry[1][0]: The current tree
57
entry[1][1]: The second tree
59
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:
60
entry[1][0][0]: minikind
61
entry[1][0][1]: fingerprint
63
entry[1][0][3]: executable
64
entry[1][0][4]: packed_stat
66
entry[1][1][4]: revision_id
74
68
There may be multiple rows at the root, one per id present in the root, so the
75
in memory root row is now::
77
self._dirblocks[0] -> ('', [entry ...]),
79
and the entries in there are::
83
entries[0][2]: file_id
84
entries[1][0]: The tree data for the current tree for this fileid at /
89
'r' is a relocated entry: This path is not present in this tree with this
90
id, but the id can be found at another location. The fingerprint is
91
used to point to the target location.
92
'a' is an absent entry: In that tree the id is not present at this path.
93
'd' is a directory entry: This path in this tree is a directory with the
94
current file id. There is no fingerprint for directories.
95
'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
the sha1 value of the file's canonical form, i.e. after any read
97
filters have been applied to the convenience form stored in the working
99
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
101
't' is a reference to a nested subtree; the fingerprint is the referenced
69
in memory root row is now:
70
self._dirblocks[0] -> ('', [entry ...]),
71
and the entries in there are
74
entries[0][2]: file_id
75
entries[1][0]: The tree data for the current tree for this fileid at /
79
'r' is a relocated entry: This path is not present in this tree with this id,
80
but the id can be found at another location. The fingerprint is used to
81
point to the target location.
82
'a' is an absent entry: In that tree the id is not present at this path.
83
'd' is a directory entry: This path in this tree is a directory with the
84
current file id. There is no fingerprint for directories.
85
'f' is a file entry: As for directory, but it's a file. The fingerprint is the
86
sha1 value of the file's canonical form, i.e. after any read filters have
87
been applied to the convenience form stored in the working tree.
88
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the
90
't' is a reference to a nested subtree; the fingerprint is the referenced
106
The entries on disk and in memory are ordered according to the following keys::
95
The entries on disk and in memory are ordered according to the following keys:
108
97
directory, as a list of components
112
101
--- Format 1 had the following different definition: ---
116
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
117
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
119
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
120
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
102
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
103
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
105
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
106
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
123
109
PARENT ROW's are emitted for every parent that is not in the ghosts details
124
110
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
445
403
self._last_block_index = None
446
404
self._last_entry_index = None
447
# The set of known hash changes
448
self._known_hash_changes = set()
449
# How many hash changed entries can we have without saving
450
self._worth_saving_limit = worth_saving_limit
452
406
def __repr__(self):
453
407
return "%s(%r)" % \
454
408
(self.__class__.__name__, self._filename)
456
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
457
"""Mark this dirstate as modified.
459
:param hash_changed_entries: if non-None, mark just these entries as
460
having their hash modified.
461
:param header_modified: mark the header modified as well, not just the
464
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
465
if hash_changed_entries:
466
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
467
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
468
DirState.IN_MEMORY_UNMODIFIED):
469
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
470
# that takes precedence.
471
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
473
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
474
# should fail noisily if someone tries to set
475
# IN_MEMORY_MODIFIED but we don't have a write-lock!
476
# We don't know exactly what changed so disable smart saving
477
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
479
self._header_state = DirState.IN_MEMORY_MODIFIED
481
def _mark_unmodified(self):
482
"""Mark this dirstate as unmodified."""
483
self._header_state = DirState.IN_MEMORY_UNMODIFIED
484
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
485
self._known_hash_changes = set()
487
410
def add(self, path, file_id, kind, stat, fingerprint):
488
411
"""Add a path to be tracked.
1347
def _check_delta_is_valid(self, delta):
1348
return list(inventory._check_delta_unique_ids(
1349
inventory._check_delta_unique_old_paths(
1350
inventory._check_delta_unique_new_paths(
1351
inventory._check_delta_ids_match_entry(
1352
inventory._check_delta_ids_are_valid(
1353
inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1355
1270
def update_by_delta(self, delta):
1356
1271
"""Apply an inventory delta to the dirstate for tree 0
1358
This is the workhorse for apply_inventory_delta in dirstate based
1361
1273
:param delta: An inventory delta. See Inventory.apply_delta for
1364
1276
self._read_dirblocks_if_needed()
1365
encode = cache_utf8.encode
1366
1277
insertions = {}
1368
# Accumulate parent references (path_utf8, id), to check for parentless
1369
# items or items placed under files/links/tree-references. We get
1370
# references from every item in the delta that is not a deletion and
1371
# is not itself the root.
1373
# Added ids must not be in the dirstate already. This set holds those
1376
# This loop transforms the delta to single atomic operations that can
1377
# be executed and validated.
1378
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1379
for old_path, new_path, file_id, inv_entry in delta:
1279
for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1380
1280
if (file_id in insertions) or (file_id in removals):
1381
self._raise_invalid(old_path or new_path, file_id,
1281
raise AssertionError("repeated file id in delta %r" % (file_id,))
1383
1282
if old_path is not None:
1384
1283
old_path = old_path.encode('utf-8')
1385
1284
removals[file_id] = old_path
1387
new_ids.add(file_id)
1388
1285
if new_path is not None:
1389
if inv_entry is None:
1390
self._raise_invalid(new_path, file_id,
1391
"new_path with no entry")
1392
1286
new_path = new_path.encode('utf-8')
1393
dirname_utf8, basename = osutils.split(new_path)
1395
parents.add((dirname_utf8, inv_entry.parent_id))
1396
key = (dirname_utf8, basename, file_id)
1287
dirname, basename = osutils.split(new_path)
1288
key = (dirname, basename, file_id)
1397
1289
minikind = DirState._kind_to_minikind[inv_entry.kind]
1398
1290
if minikind == 't':
1399
fingerprint = inv_entry.reference_revision or ''
1291
fingerprint = inv_entry.reference_revision
1401
1293
fingerprint = ''
1402
1294
insertions[file_id] = (key, minikind, inv_entry.executable,
1411
1303
minikind = child[1][0][0]
1412
1304
fingerprint = child[1][0][4]
1413
1305
executable = child[1][0][3]
1414
old_child_path = osutils.pathjoin(child_dirname,
1306
old_child_path = osutils.pathjoin(child[0][0],
1416
1308
removals[child[0][2]] = old_child_path
1417
1309
child_suffix = child_dirname[len(old_path):]
1418
1310
new_child_dirname = (new_path + child_suffix)
1419
1311
key = (new_child_dirname, child_basename, child[0][2])
1420
new_child_path = osutils.pathjoin(new_child_dirname,
1312
new_child_path = os.path.join(new_child_dirname,
1422
1314
insertions[child[0][2]] = (key, minikind, executable,
1423
1315
fingerprint, new_child_path)
1424
self._check_delta_ids_absent(new_ids, delta, 0)
1426
self._apply_removals(removals.iteritems())
1427
self._apply_insertions(insertions.values())
1429
self._after_delta_check_parents(parents, 0)
1430
except errors.BzrError, e:
1431
self._changes_aborted = True
1432
if 'integrity error' not in str(e):
1434
# _get_entry raises BzrError when a request is inconsistent; we
1435
# want such errors to be shown as InconsistentDelta - and that
1436
# fits the behaviour we trigger.
1437
raise errors.InconsistentDeltaDelta(delta,
1438
"error from _get_entry. %s" % (e,))
1316
self._apply_removals(removals.values())
1317
self._apply_insertions(insertions.values())
1440
1319
def _apply_removals(self, removals):
1441
for file_id, path in sorted(removals, reverse=True,
1442
key=operator.itemgetter(1)):
1320
for path in sorted(removals, reverse=True):
1443
1321
dirname, basename = osutils.split(path)
1444
1322
block_i, entry_i, d_present, f_present = \
1445
1323
self._get_block_entry_index(dirname, basename, 0)
1447
entry = self._dirblocks[block_i][1][entry_i]
1449
self._raise_invalid(path, file_id,
1450
"Wrong path for old path.")
1451
if not f_present or entry[1][0][0] in 'ar':
1452
self._raise_invalid(path, file_id,
1453
"Wrong path for old path.")
1454
if file_id != entry[0][2]:
1455
self._raise_invalid(path, file_id,
1456
"Attempt to remove path has wrong id - found %r."
1324
entry = self._dirblocks[block_i][1][entry_i]
1458
1325
self._make_absent(entry)
1459
1326
# See if we have a malformed delta: deleting a directory must not
1460
1327
# leave crud behind. This increases the number of bisects needed
1526
1392
# At the same time, to reduce interface friction we convert the input
1527
1393
# inventory entries to dirstate.
1528
1394
root_only = ('', '')
1529
# Accumulate parent references (path_utf8, id), to check for parentless
1530
# items or items placed under files/links/tree-references. We get
1531
# references from every item in the delta that is not a deletion and
1532
# is not itself the root.
1534
# Added ids must not be in the dirstate already. This set holds those
1537
1395
for old_path, new_path, file_id, inv_entry in delta:
1538
if inv_entry is not None and file_id != inv_entry.file_id:
1539
self._raise_invalid(new_path, file_id,
1540
"mismatched entry file_id %r" % inv_entry)
1541
if new_path is None:
1542
new_path_utf8 = None
1544
if inv_entry is None:
1545
self._raise_invalid(new_path, file_id,
1546
"new_path with no entry")
1547
new_path_utf8 = encode(new_path)
1548
# note the parent for validation
1549
dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1551
parents.add((dirname_utf8, inv_entry.parent_id))
1552
if old_path is None:
1553
old_path_utf8 = None
1555
old_path_utf8 = encode(old_path)
1556
if old_path is None:
1557
adds.append((None, new_path_utf8, file_id,
1396
if old_path is None:
1397
adds.append((None, encode(new_path), file_id,
1558
1398
inv_to_entry(inv_entry), True))
1559
new_ids.add(file_id)
1560
1399
elif new_path is None:
1561
deletes.append((old_path_utf8, None, file_id, None, True))
1562
elif (old_path, new_path) == root_only:
1563
# change things in-place
1564
# Note: the case of a parent directory changing its file_id
1565
# tends to break optimizations here, because officially
1566
# the file has actually been moved, it just happens to
1567
# end up at the same path. If we can figure out how to
1568
# handle that case, we can avoid a lot of add+delete
1569
# pairs for objects that stay put.
1570
# elif old_path == new_path:
1571
changes.append((old_path_utf8, new_path_utf8, file_id,
1572
inv_to_entry(inv_entry)))
1400
deletes.append((encode(old_path), None, file_id, None, True))
1401
elif (old_path, new_path) != root_only:
1575
1403
# Because renames must preserve their children we must have
1576
1404
# processed all relocations and removes before hand. The sort
1585
1413
# for 'r' items on every pass.
1586
1414
self._update_basis_apply_deletes(deletes)
1416
new_path_utf8 = encode(new_path)
1588
1417
# Split into an add/delete pair recursively.
1589
adds.append((old_path_utf8, new_path_utf8, file_id,
1590
inv_to_entry(inv_entry), False))
1418
adds.append((None, new_path_utf8, file_id,
1419
inv_to_entry(inv_entry), False))
1591
1420
# Expunge deletes that we've seen so that deleted/renamed
1592
1421
# children of a rename directory are handled correctly.
1593
new_deletes = reversed(list(
1594
self._iter_child_entries(1, old_path_utf8)))
1422
new_deletes = reversed(list(self._iter_child_entries(1,
1595
1424
# Remove the current contents of the tree at orig_path, and
1596
1425
# reinsert at the correct new path.
1597
1426
for entry in new_deletes:
1598
child_dirname, child_basename, child_file_id = entry[0]
1600
source_path = child_dirname + '/' + child_basename
1428
source_path = entry[0][0] + '/' + entry[0][1]
1602
source_path = child_basename
1430
source_path = entry[0][1]
1603
1431
if new_path_utf8:
1604
1432
target_path = new_path_utf8 + source_path[len(old_path):]
1606
1434
if old_path == '':
1607
1435
raise AssertionError("cannot rename directory to"
1609
1437
target_path = source_path[len(old_path) + 1:]
1610
1438
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1611
1439
deletes.append(
1612
1440
(source_path, target_path, entry[0][2], None, False))
1613
deletes.append((old_path_utf8, new_path, file_id, None, False))
1614
self._check_delta_ids_absent(new_ids, delta, 1)
1616
# Finish expunging deletes/first half of renames.
1617
self._update_basis_apply_deletes(deletes)
1618
# Reinstate second half of renames and new paths.
1619
self._update_basis_apply_adds(adds)
1620
# Apply in-situ changes.
1621
self._update_basis_apply_changes(changes)
1623
self._after_delta_check_parents(parents, 1)
1624
except errors.BzrError, e:
1625
self._changes_aborted = True
1626
if 'integrity error' not in str(e):
1628
# _get_entry raises BzrError when a request is inconsistent; we
1629
# want such errors to be shown as InconsistentDelta - and that
1630
# fits the behaviour we trigger.
1631
raise errors.InconsistentDeltaDelta(delta,
1632
"error from _get_entry. %s" % (e,))
1634
self._mark_modified(header_modified=True)
1442
(encode(old_path), new_path, file_id, None, False))
1444
# changes to just the root should not require remove/insertion
1446
changes.append((encode(old_path), encode(new_path), file_id,
1447
inv_to_entry(inv_entry)))
1449
# Finish expunging deletes/first half of renames.
1450
self._update_basis_apply_deletes(deletes)
1451
# Reinstate second half of renames and new paths.
1452
self._update_basis_apply_adds(adds)
1453
# Apply in-situ changes.
1454
self._update_basis_apply_changes(changes)
1456
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1457
self._header_state = DirState.IN_MEMORY_MODIFIED
1635
1458
self._id_index = None
1638
def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1639
"""Check that none of the file_ids in new_ids are present in a tree."""
1642
id_index = self._get_id_index()
1643
for file_id in new_ids:
1644
for key in id_index.get(file_id, ()):
1645
block_i, entry_i, d_present, f_present = \
1646
self._get_block_entry_index(key[0], key[1], tree_index)
1648
# In a different tree
1650
entry = self._dirblocks[block_i][1][entry_i]
1651
if entry[0][2] != file_id:
1652
# Different file_id, so not what we want.
1654
self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id,
1655
"This file_id is new in the delta but already present in "
1658
def _raise_invalid(self, path, file_id, reason):
1659
self._changes_aborted = True
1660
raise errors.InconsistentDelta(path, file_id, reason)
1662
1461
def _update_basis_apply_adds(self, adds):
1663
1462
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1673
1472
# Adds are accumulated partly from renames, so can be in any input
1674
1473
# order - sort it.
1675
# TODO: we may want to sort in dirblocks order. That way each entry
1676
# will end up in the same directory, allowing the _get_entry
1677
# fast-path for looking up 2 items in the same dir work.
1678
adds.sort(key=lambda x: x[1])
1679
1475
# adds is now in lexographic order, which places all parents before
1680
1476
# their children, so we can process it linearly.
1682
st = static_tuple.StaticTuple
1683
1478
for old_path, new_path, file_id, new_details, real_add in adds:
1684
dirname, basename = osutils.split(new_path)
1685
entry_key = st(dirname, basename, file_id)
1686
block_index, present = self._find_block_index_from_key(entry_key)
1688
self._raise_invalid(new_path, file_id,
1689
"Unable to find block for this record."
1690
" Was the parent added?")
1691
block = self._dirblocks[block_index][1]
1692
entry_index, present = self._find_entry_index(entry_key, block)
1694
if old_path is not None:
1695
self._raise_invalid(new_path, file_id,
1696
'considered a real add but still had old_path at %s'
1699
entry = block[entry_index]
1700
basis_kind = entry[1][1][0]
1701
if basis_kind == 'a':
1702
entry[1][1] = new_details
1703
elif basis_kind == 'r':
1704
raise NotImplementedError()
1706
self._raise_invalid(new_path, file_id,
1707
"An entry was marked as a new add"
1708
" but the basis target already existed")
1710
# The exact key was not found in the block. However, we need to
1711
# check if there is a key next to us that would have matched.
1712
# We only need to check 2 locations, because there are only 2
1714
for maybe_index in range(entry_index-1, entry_index+1):
1715
if maybe_index < 0 or maybe_index >= len(block):
1717
maybe_entry = block[maybe_index]
1718
if maybe_entry[0][:2] != (dirname, basename):
1719
# Just a random neighbor
1721
if maybe_entry[0][2] == file_id:
1722
raise AssertionError(
1723
'_find_entry_index didnt find a key match'
1724
' but walking the data did, for %s'
1726
basis_kind = maybe_entry[1][1][0]
1727
if basis_kind not in 'ar':
1728
self._raise_invalid(new_path, file_id,
1729
"we have an add record for path, but the path"
1730
" is already present with another file_id %s"
1731
% (maybe_entry[0][2],))
1733
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1735
block.insert(entry_index, entry)
1737
active_kind = entry[1][0][0]
1738
if active_kind == 'a':
1739
# The active record shows up as absent, this could be genuine,
1740
# or it could be present at some other location. We need to
1742
id_index = self._get_id_index()
1743
# The id_index may not be perfectly accurate for tree1, because
1744
# we haven't been keeping it updated. However, it should be
1745
# fine for tree0, and that gives us enough info for what we
1747
keys = id_index.get(file_id, ())
1749
block_i, entry_i, d_present, f_present = \
1750
self._get_block_entry_index(key[0], key[1], 0)
1753
active_entry = self._dirblocks[block_i][1][entry_i]
1754
if (active_entry[0][2] != file_id):
1755
# Some other file is at this path, we don't need to
1758
real_active_kind = active_entry[1][0][0]
1759
if real_active_kind in 'ar':
1760
# We found a record, which was not *this* record,
1761
# which matches the file_id, but is not actually
1762
# present. Something seems *really* wrong.
1763
self._raise_invalid(new_path, file_id,
1764
"We found a tree0 entry that doesnt make sense")
1765
# Now, we've found a tree0 entry which matches the file_id
1766
# but is at a different location. So update them to be
1768
active_dir, active_name = active_entry[0][:2]
1770
active_path = active_dir + '/' + active_name
1772
active_path = active_name
1773
active_entry[1][1] = st('r', new_path, 0, False, '')
1774
entry[1][0] = st('r', active_path, 0, False, '')
1775
elif active_kind == 'r':
1776
raise NotImplementedError()
1778
new_kind = new_details[0]
1780
self._ensure_block(block_index, entry_index, new_path)
1479
# the entry for this file_id must be in tree 0.
1480
entry = self._get_entry(0, file_id, new_path)
1481
if entry[0] is None or entry[0][2] != file_id:
1482
self._changes_aborted = True
1483
raise errors.InconsistentDelta(new_path, file_id,
1484
'working tree does not contain new entry')
1485
if real_add and entry[1][1][0] not in absent:
1486
self._changes_aborted = True
1487
raise errors.InconsistentDelta(new_path, file_id,
1488
'The entry was considered to be a genuinely new record,'
1489
' but there was already an old record for it.')
1490
# We don't need to update the target of an 'r' because the handling
1491
# of renames turns all 'r' situations into a delete at the original
1493
entry[1][1] = new_details
1782
1495
def _update_basis_apply_changes(self, changes):
1783
1496
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1809
1528
null = DirState.NULL_PARENT_DETAILS
1810
1529
for old_path, new_path, file_id, _, real_delete in deletes:
1811
1530
if real_delete != (new_path is None):
1812
self._raise_invalid(old_path, file_id, "bad delete delta")
1531
raise AssertionError("bad delete delta")
1813
1532
# the entry for this file_id must be in tree 1.
1814
1533
dirname, basename = osutils.split(old_path)
1815
1534
block_index, entry_index, dir_present, file_present = \
1816
1535
self._get_block_entry_index(dirname, basename, 1)
1817
1536
if not file_present:
1818
self._raise_invalid(old_path, file_id,
1537
self._changes_aborted = True
1538
raise errors.InconsistentDelta(old_path, file_id,
1819
1539
'basis tree does not contain removed entry')
1820
1540
entry = self._dirblocks[block_index][1][entry_index]
1821
# The state of the entry in the 'active' WT
1822
active_kind = entry[1][0][0]
1823
1541
if entry[0][2] != file_id:
1824
self._raise_invalid(old_path, file_id,
1542
self._changes_aborted = True
1543
raise errors.InconsistentDelta(old_path, file_id,
1825
1544
'mismatched file_id in tree 1')
1827
old_kind = entry[1][1][0]
1828
if active_kind in 'ar':
1829
# The active tree doesn't have this file_id.
1830
# The basis tree is changing this record. If this is a
1831
# rename, then we don't want the record here at all
1832
# anymore. If it is just an in-place change, we want the
1833
# record here, but we'll add it if we need to. So we just
1835
if active_kind == 'r':
1836
active_path = entry[1][0][1]
1837
active_entry = self._get_entry(0, file_id, active_path)
1838
if active_entry[1][1][0] != 'r':
1839
self._raise_invalid(old_path, file_id,
1840
"Dirstate did not have matching rename entries")
1841
elif active_entry[1][0][0] in 'ar':
1842
self._raise_invalid(old_path, file_id,
1843
"Dirstate had a rename pointing at an inactive"
1845
active_entry[1][1] = null
1546
if entry[1][0][0] != 'a':
1547
self._changes_aborted = True
1548
raise errors.InconsistentDelta(old_path, file_id,
1549
'This was marked as a real delete, but the WT state'
1550
' claims that it still exists and is versioned.')
1846
1551
del self._dirblocks[block_index][1][entry_index]
1848
# This was a directory, and the active tree says it
1849
# doesn't exist, and now the basis tree says it doesn't
1850
# exist. Remove its dirblock if present
1852
present) = self._find_block_index_from_key(
1855
dir_block = self._dirblocks[dir_block_index][1]
1857
# This entry is empty, go ahead and just remove it
1858
del self._dirblocks[dir_block_index]
1860
# There is still an active record, so just mark this
1863
block_i, entry_i, d_present, f_present = \
1864
self._get_block_entry_index(old_path, '', 1)
1866
dir_block = self._dirblocks[block_i][1]
1867
for child_entry in dir_block:
1868
child_basis_kind = child_entry[1][1][0]
1869
if child_basis_kind not in 'ar':
1870
self._raise_invalid(old_path, file_id,
1871
"The file id was deleted but its children were "
1874
def _after_delta_check_parents(self, parents, index):
1875
"""Check that parents required by the delta are all intact.
1877
:param parents: An iterable of (path_utf8, file_id) tuples which are
1878
required to be present in tree 'index' at path_utf8 with id file_id
1880
:param index: The column in the dirstate to check for parents in.
1882
for dirname_utf8, file_id in parents:
1883
# Get the entry - the ensures that file_id, dirname_utf8 exists and
1884
# has the right file id.
1885
entry = self._get_entry(index, file_id, dirname_utf8)
1886
if entry[1] is None:
1887
self._raise_invalid(dirname_utf8.decode('utf8'),
1888
file_id, "This parent is not present.")
1889
# Parents of things must be directories
1890
if entry[1][index][0] != 'd':
1891
self._raise_invalid(dirname_utf8.decode('utf8'),
1892
file_id, "This parent is not a directory.")
1553
if entry[1][0][0] == 'a':
1554
self._changes_aborted = True
1555
raise errors.InconsistentDelta(old_path, file_id,
1556
'The entry was considered a rename, but the source path'
1557
' is marked as absent.')
1558
# For whatever reason, we were asked to rename an entry
1559
# that was originally marked as deleted. This could be
1560
# because we are renaming the parent directory, and the WT
1561
# current state has the file marked as deleted.
1562
elif entry[1][0][0] == 'r':
1563
# implement the rename
1564
del self._dirblocks[block_index][1][entry_index]
1566
# it is being resurrected here, so blank it out temporarily.
1567
self._dirblocks[block_index][1][entry_index][1][1] = null
1894
1569
def _observed_sha1(self, entry, sha1, stat_value,
1895
1570
_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
2327
1995
def _get_id_index(self):
2328
"""Get an id index of self._dirblocks.
2330
This maps from file_id => [(directory, name, file_id)] entries where
2331
that file_id appears in one of the trees.
1996
"""Get an id index of self._dirblocks."""
2333
1997
if self._id_index is None:
2335
1999
for key, tree_details in self._iter_entries():
2336
self._add_to_id_index(id_index, key)
2000
id_index.setdefault(key[2], set()).add(key)
2337
2001
self._id_index = id_index
2338
2002
return self._id_index
2340
def _add_to_id_index(self, id_index, entry_key):
2341
"""Add this entry to the _id_index mapping."""
2342
# This code used to use a set for every entry in the id_index. However,
2343
# it is *rare* to have more than one entry. So a set is a large
2344
# overkill. And even when we do, we won't ever have more than the
2345
# number of parent trees. Which is still a small number (rarely >2). As
2346
# such, we use a simple tuple, and do our own uniqueness checks. While
2347
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2348
# cause quadratic failure.
2349
file_id = entry_key[2]
2350
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2351
if file_id not in id_index:
2352
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2354
entry_keys = id_index[file_id]
2355
if entry_key not in entry_keys:
2356
id_index[file_id] = entry_keys + (entry_key,)
2358
def _remove_from_id_index(self, id_index, entry_key):
2359
"""Remove this entry from the _id_index mapping.
2361
It is an programming error to call this when the entry_key is not
2364
file_id = entry_key[2]
2365
entry_keys = list(id_index[file_id])
2366
entry_keys.remove(entry_key)
2367
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2369
2004
def _get_output_lines(self, lines):
2370
2005
"""Format lines for final output.
2725
2317
new_details = []
2726
2318
for lookup_index in xrange(tree_index):
2727
2319
# boundary case: this is the first occurence of file_id
2728
# so there are no id_indexes, possibly take this out of
2320
# so there are no id_indexs, possibly take this out of
2730
if not len(entry_keys):
2322
if not len(id_index[file_id]):
2731
2323
new_details.append(DirState.NULL_PARENT_DETAILS)
2733
2325
# grab any one entry, use it to find the right path.
2734
a_key = iter(entry_keys).next()
2326
# TODO: optimise this to reduce memory use in highly
2327
# fragmented situations by reusing the relocation
2329
a_key = iter(id_index[file_id]).next()
2735
2330
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2736
# its a pointer or missing statement, use it as
2331
# its a pointer or missing statement, use it as is.
2738
2332
new_details.append(by_path[a_key][lookup_index])
2740
2334
# we have the right key, make a pointer to it.
2741
2335
real_path = ('/'.join(a_key[0:2])).strip('/')
2742
new_details.append(st('r', real_path, 0, False,
2336
new_details.append(('r', real_path, 0, False, ''))
2744
2337
new_details.append(self._inv_entry_to_details(entry))
2745
2338
new_details.extend(new_location_suffix)
2746
2339
by_path[new_entry_key] = new_details
2747
self._add_to_id_index(id_index, new_entry_key)
2340
id_index[file_id].add(new_entry_key)
2748
2341
# --- end generation of full tree mappings
2750
2343
# sort and output all the entries
2889
2455
and new_entry_key[1:] < current_old[0][1:])):
2890
2456
# new comes before:
2891
2457
# add a entry for this and advance new
2893
trace.mutter("Inserting from new '%s'.",
2894
new_path_utf8.decode('utf8'))
2895
2458
self.update_minimal(new_entry_key, current_new_minikind,
2896
2459
executable=current_new[1].executable,
2897
path_utf8=new_path_utf8, fingerprint=fingerprint,
2460
path_utf8=new_path_utf8, fingerprint=fingerprint)
2899
2461
current_new = advance(new_iterator)
2901
2463
# we've advanced past the place where the old key would be,
2902
2464
# without seeing it in the new list. so it must be gone.
2904
trace.mutter("Deleting from old '%s/%s'.",
2905
current_old[0][0].decode('utf8'),
2906
current_old[0][1].decode('utf8'))
2907
2465
self._make_absent(current_old)
2908
2466
current_old = advance(old_iterator)
2909
self._mark_modified()
2467
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2910
2468
self._id_index = None
2911
2469
self._packed_stat_index = None
2913
trace.mutter("set_state_from_inventory complete.")
2915
def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts):
2916
"""Wipe the currently stored state and set it to something new.
2918
This is a hard-reset for the data we are working with.
2920
# Technically, we really want a write lock, but until we write, we
2921
# don't really need it.
2922
self._requires_lock()
2923
# root dir and root dir contents with no children. We have to have a
2924
# root for set_state_from_inventory to work correctly.
2925
empty_root = (('', '', inventory.ROOT_ID),
2926
[('d', '', 0, False, DirState.NULLSTAT)])
2927
empty_tree_dirblocks = [('', [empty_root]), ('', [])]
2928
self._set_data([], empty_tree_dirblocks)
2929
self.set_state_from_inventory(working_inv)
2930
self.set_parent_trees(parent_trees, parent_ghosts)
2932
2471
def _make_absent(self, current_old):
2933
2472
"""Mark current_old - an entry - as absent for tree 0.
3043
2562
# grab one of them and use it to generate parent
3044
2563
# relocation/absent entries.
3045
2564
new_entry = key, [new_details]
3046
# existing_keys can be changed as we iterate.
3047
for other_key in tuple(existing_keys):
2565
for other_key in existing_keys:
3048
2566
# change the record at other to be a pointer to this new
3049
2567
# record. The loop looks similar to the change to
3050
2568
# relocations when updating an existing record but its not:
3051
2569
# the test for existing kinds is different: this can be
3052
2570
# factored out to a helper though.
3053
other_block_index, present = self._find_block_index_from_key(
3056
raise AssertionError('could not find block for %s' % (
3058
other_block = self._dirblocks[other_block_index][1]
3059
other_entry_index, present = self._find_entry_index(
3060
other_key, other_block)
3062
raise AssertionError(
3063
'update_minimal: could not find other entry for %s'
2571
other_block_index, present = self._find_block_index_from_key(other_key)
2573
raise AssertionError('could not find block for %s' % (other_key,))
2574
other_entry_index, present = self._find_entry_index(other_key,
2575
self._dirblocks[other_block_index][1])
2577
raise AssertionError('could not find entry for %s' % (other_key,))
3065
2578
if path_utf8 is None:
3066
2579
raise AssertionError('no path')
3067
# Turn this other location into a reference to the new
3068
# location. This also updates the aliased iterator
3069
# (current_old in set_state_from_inventory) so that the old
3070
# entry, if not already examined, is skipped over by that
3072
other_entry = other_block[other_entry_index]
3073
other_entry[1][0] = ('r', path_utf8, 0, False, '')
3074
if self._maybe_remove_row(other_block, other_entry_index,
3076
# If the row holding this was removed, we need to
3077
# recompute where this entry goes
3078
entry_index, _ = self._find_entry_index(key, block)
2580
self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2581
('r', path_utf8, 0, False, '')
3081
# adds a tuple to the new details for each column
3082
# - either by copying an existing relocation pointer inside that column
3083
# - or by creating a new pointer to the right row inside that column
3084
2583
num_present_parents = self._num_present_parents()
3085
if num_present_parents:
3086
# TODO: This re-evaluates the existing_keys set, do we need
3087
# to do that ourselves?
3088
other_key = list(existing_keys)[0]
3089
2584
for lookup_index in xrange(1, num_present_parents + 1):
3090
2585
# grab any one entry, use it to find the right path.
3091
2586
# TODO: optimise this to reduce memory use in highly
3475
2930
entry[1][0] = ('l', '', stat_value.st_size,
3476
2931
False, DirState.NULLSTAT)
3478
state._mark_modified([entry])
2932
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3479
2933
return link_or_sha1
2934
update_entry = py_update_entry
3482
2937
class ProcessEntryPython(object):
3484
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2939
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
3485
2940
"last_source_parent", "last_target_parent", "include_unchanged",
3486
"partial", "use_filesystem_for_exec", "utf8_decode",
3487
"searched_specific_files", "search_specific_files",
3488
"searched_exact_paths", "search_specific_file_parents", "seen_ids",
3489
"state", "source_index", "target_index", "want_unversioned", "tree"]
2941
"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2942
"search_specific_files", "state", "source_index", "target_index",
2943
"want_unversioned", "tree"]
3491
2945
def __init__(self, include_unchanged, use_filesystem_for_exec,
3492
2946
search_specific_files, state, source_index, target_index,
3493
2947
want_unversioned, tree):
3494
2948
self.old_dirname_to_file_id = {}
3495
2949
self.new_dirname_to_file_id = {}
3496
# Are we doing a partial iter_changes?
3497
self.partial = search_specific_files != set([''])
2950
# Just a sentry, so that _process_entry can say that this
2951
# record is handled, but isn't interesting to process (unchanged)
2952
self.uninteresting = object()
3498
2953
# Using a list so that we can access the values and change them in
3499
2954
# nested scope. Each one is [path, file_id, entry]
3500
2955
self.last_source_parent = [None, None]
4156
3570
current_dir_info = dir_iterator.next()
4157
3571
except StopIteration:
4158
3572
current_dir_info = None
4159
for result in self._iter_specific_file_parents():
4162
def _iter_specific_file_parents(self):
4163
"""Iter over the specific file parents."""
4164
while self.search_specific_file_parents:
4165
# Process the parent directories for the paths we were iterating.
4166
# Even in extremely large trees this should be modest, so currently
4167
# no attempt is made to optimise.
4168
path_utf8 = self.search_specific_file_parents.pop()
4169
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
4170
# We've examined this path.
4172
if path_utf8 in self.searched_exact_paths:
4173
# We've examined this path.
4175
path_entries = self.state._entries_for_path(path_utf8)
4176
# We need either one or two entries. If the path in
4177
# self.target_index has moved (so the entry in source_index is in
4178
# 'ar') then we need to also look for the entry for this path in
4179
# self.source_index, to output the appropriate delete-or-rename.
4180
selected_entries = []
4182
for candidate_entry in path_entries:
4183
# Find entries present in target at this path:
4184
if candidate_entry[1][self.target_index][0] not in 'ar':
4186
selected_entries.append(candidate_entry)
4187
# Find entries present in source at this path:
4188
elif (self.source_index is not None and
4189
candidate_entry[1][self.source_index][0] not in 'ar'):
4191
if candidate_entry[1][self.target_index][0] == 'a':
4192
# Deleted, emit it here.
4193
selected_entries.append(candidate_entry)
4195
# renamed, emit it when we process the directory it
4197
self.search_specific_file_parents.add(
4198
candidate_entry[1][self.target_index][1])
4200
raise AssertionError(
4201
"Missing entry for specific path parent %r, %r" % (
4202
path_utf8, path_entries))
4203
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
4204
for entry in selected_entries:
4205
if entry[0][2] in self.seen_ids:
4207
result, changed = self._process_entry(entry, path_info)
4209
raise AssertionError(
4210
"Got entry<->path mismatch for specific path "
4211
"%r entry %r path_info %r " % (
4212
path_utf8, entry, path_info))
4213
# Only include changes - we're outside the users requested
4216
self._gather_result_for_consistency(result)
4217
if (result[6][0] == 'directory' and
4218
result[6][1] != 'directory'):
4219
# This stopped being a directory, the old children have
4221
if entry[1][self.source_index][0] == 'r':
4222
# renamed, take the source path
4223
entry_path_utf8 = entry[1][self.source_index][1]
4225
entry_path_utf8 = path_utf8
4226
initial_key = (entry_path_utf8, '', '')
4227
block_index, _ = self.state._find_block_index_from_key(
4229
if block_index == 0:
4230
# The children of the root are in block index 1.
4232
current_block = None
4233
if block_index < len(self.state._dirblocks):
4234
current_block = self.state._dirblocks[block_index]
4235
if not osutils.is_inside(
4236
entry_path_utf8, current_block[0]):
4237
# No entries for this directory at all.
4238
current_block = None
4239
if current_block is not None:
4240
for entry in current_block[1]:
4241
if entry[1][self.source_index][0] in 'ar':
4242
# Not in the source tree, so doesn't have to be
4245
# Path of the entry itself.
4247
self.search_specific_file_parents.add(
4248
osutils.pathjoin(*entry[0][:2]))
4249
if changed or self.include_unchanged:
4251
self.searched_exact_paths.add(path_utf8)
4253
def _path_info(self, utf8_path, unicode_path):
4254
"""Generate path_info for unicode_path.
4256
:return: None if unicode_path does not exist, or a path_info tuple.
4258
abspath = self.tree.abspath(unicode_path)
4260
stat = os.lstat(abspath)
4262
if e.errno == errno.ENOENT:
4263
# the path does not exist.
4267
utf8_basename = utf8_path.rsplit('/', 1)[-1]
4268
dir_info = (utf8_path, utf8_basename,
4269
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4271
if dir_info[2] == 'directory':
4272
if self.tree._directory_is_tree_reference(
4274
self.root_dir_info = self.root_dir_info[:2] + \
4275
('tree-reference',) + self.root_dir_info[3:]
3573
_process_entry = ProcessEntryPython
4279
3576
# Try to load the compiled form if possible
4281
from bzrlib._dirstate_helpers_pyx import (
3578
from bzrlib._dirstate_helpers_c import (
3579
_read_dirblocks_c as _read_dirblocks,
3580
bisect_dirblock_c as bisect_dirblock,
3581
_bisect_path_left_c as _bisect_path_left,
3582
_bisect_path_right_c as _bisect_path_right,
3583
cmp_by_dirs_c as cmp_by_dirs,
4287
3584
ProcessEntryC as _process_entry,
4288
3585
update_entry as update_entry,
4290
except ImportError, e:
4291
osutils.failed_to_load_extension(e)
4292
3588
from bzrlib._dirstate_helpers_py import (
3589
_read_dirblocks_py as _read_dirblocks,
3590
bisect_dirblock_py as bisect_dirblock,
3591
_bisect_path_left_py as _bisect_path_left,
3592
_bisect_path_right_py as _bisect_path_right,
3593
cmp_by_dirs_py as cmp_by_dirs,
4299
# FIXME: It would be nice to be able to track moved lines so that the
4300
# corresponding python code can be moved to the _dirstate_helpers_py
4301
# module. I don't want to break the history for this important piece of
4302
# code so I left the code here -- vila 20090622
4303
update_entry = py_update_entry
4304
_process_entry = ProcessEntryPython