20
20
lines by NL. The field delimiters are ommitted in the grammar, line delimiters
21
21
are not - this is done for clarity of reading. All string data is in utf8.
25
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
28
WHOLE_NUMBER = {digit}, digit;
30
REVISION_ID = a non-empty utf8 string;
32
dirstate format = header line, full checksum, row count, parent details,
33
ghost_details, entries;
34
header line = "#bazaar dirstate flat format 3", NL;
35
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
36
row count = "num_entries: ", WHOLE_NUMBER, NL;
37
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
38
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
40
entry = entry_key, current_entry_details, {parent_entry_details};
41
entry_key = dirname, basename, fileid;
42
current_entry_details = common_entry_details, working_entry_details;
43
parent_entry_details = common_entry_details, history_entry_details;
44
common_entry_details = MINIKIND, fingerprint, size, executable
45
working_entry_details = packed_stat
46
history_entry_details = REVISION_ID;
49
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
51
Given this definition, the following is useful to know::
53
entry (aka row) - all the data for a given key.
54
entry[0]: The key (dirname, basename, fileid)
58
entry[1]: The tree(s) data for this path and id combination.
59
entry[1][0]: The current tree
60
entry[1][1]: The second tree
62
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate::
64
entry[1][0][0]: minikind
65
entry[1][0][1]: fingerprint
67
entry[1][0][3]: executable
68
entry[1][0][4]: packed_stat
72
entry[1][1][4]: revision_id
23
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
26
WHOLE_NUMBER = {digit}, digit;
28
REVISION_ID = a non-empty utf8 string;
30
dirstate format = header line, full checksum, row count, parent details,
31
ghost_details, entries;
32
header line = "#bazaar dirstate flat format 3", NL;
33
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
34
row count = "num_entries: ", WHOLE_NUMBER, NL;
35
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
36
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
38
entry = entry_key, current_entry_details, {parent_entry_details};
39
entry_key = dirname, basename, fileid;
40
current_entry_details = common_entry_details, working_entry_details;
41
parent_entry_details = common_entry_details, history_entry_details;
42
common_entry_details = MINIKIND, fingerprint, size, executable
43
working_entry_details = packed_stat
44
history_entry_details = REVISION_ID;
47
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
49
Given this definition, the following is useful to know:
50
entry (aka row) - all the data for a given key.
51
entry[0]: The key (dirname, basename, fileid)
55
entry[1]: The tree(s) data for this path and id combination.
56
entry[1][0]: The current tree
57
entry[1][1]: The second tree
59
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:
60
entry[1][0][0]: minikind
61
entry[1][0][1]: fingerprint
63
entry[1][0][3]: executable
64
entry[1][0][4]: packed_stat
66
entry[1][1][4]: revision_id
74
68
There may be multiple rows at the root, one per id present in the root, so the
75
in memory root row is now::
77
self._dirblocks[0] -> ('', [entry ...]),
79
and the entries in there are::
83
entries[0][2]: file_id
84
entries[1][0]: The tree data for the current tree for this fileid at /
89
'r' is a relocated entry: This path is not present in this tree with this
90
id, but the id can be found at another location. The fingerprint is
91
used to point to the target location.
92
'a' is an absent entry: In that tree the id is not present at this path.
93
'd' is a directory entry: This path in this tree is a directory with the
94
current file id. There is no fingerprint for directories.
95
'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
the sha1 value of the file's canonical form, i.e. after any read
97
filters have been applied to the convenience form stored in the working
99
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
101
't' is a reference to a nested subtree; the fingerprint is the referenced
69
in memory root row is now:
70
self._dirblocks[0] -> ('', [entry ...]),
71
and the entries in there are
74
entries[0][2]: file_id
75
entries[1][0]: The tree data for the current tree for this fileid at /
79
'r' is a relocated entry: This path is not present in this tree with this id,
80
but the id can be found at another location. The fingerprint is used to
81
point to the target location.
82
'a' is an absent entry: In that tree the id is not present at this path.
83
'd' is a directory entry: This path in this tree is a directory with the
84
current file id. There is no fingerprint for directories.
85
'f' is a file entry: As for directory, but it's a file. The fingerprint is the
86
sha1 value of the file's canonical form, i.e. after any read filters have
87
been applied to the convenience form stored in the working tree.
88
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the
90
't' is a reference to a nested subtree; the fingerprint is the referenced
106
The entries on disk and in memory are ordered according to the following keys::
95
The entries on disk and in memory are ordered according to the following keys:
108
97
directory, as a list of components
112
101
--- Format 1 had the following different definition: ---
116
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
117
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
119
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
120
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
102
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
103
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
105
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
106
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
123
109
PARENT ROW's are emitted for every parent that is not in the ghosts details
124
110
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
445
410
self._last_block_index = None
446
411
self._last_entry_index = None
447
# The set of known hash changes
448
self._known_hash_changes = set()
449
# How many hash changed entries can we have without saving
450
self._worth_saving_limit = worth_saving_limit
452
413
def __repr__(self):
453
414
return "%s(%r)" % \
454
415
(self.__class__.__name__, self._filename)
456
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
457
"""Mark this dirstate as modified.
459
:param hash_changed_entries: if non-None, mark just these entries as
460
having their hash modified.
461
:param header_modified: mark the header modified as well, not just the
464
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
465
if hash_changed_entries:
466
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
467
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
468
DirState.IN_MEMORY_UNMODIFIED):
469
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
470
# that takes precedence.
471
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
473
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
474
# should fail noisily if someone tries to set
475
# IN_MEMORY_MODIFIED but we don't have a write-lock!
476
# We don't know exactly what changed so disable smart saving
477
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
479
self._header_state = DirState.IN_MEMORY_MODIFIED
481
def _mark_unmodified(self):
482
"""Mark this dirstate as unmodified."""
483
self._header_state = DirState.IN_MEMORY_UNMODIFIED
484
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
485
self._known_hash_changes = set()
487
417
def add(self, path, file_id, kind, stat, fingerprint):
488
418
"""Add a path to be tracked.
1347
def _check_delta_is_valid(self, delta):
1348
return list(inventory._check_delta_unique_ids(
1349
inventory._check_delta_unique_old_paths(
1350
inventory._check_delta_unique_new_paths(
1351
inventory._check_delta_ids_match_entry(
1352
inventory._check_delta_ids_are_valid(
1353
inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1355
1277
def update_by_delta(self, delta):
1356
1278
"""Apply an inventory delta to the dirstate for tree 0
1358
This is the workhorse for apply_inventory_delta in dirstate based
1361
1280
:param delta: An inventory delta. See Inventory.apply_delta for
1364
1283
self._read_dirblocks_if_needed()
1365
encode = cache_utf8.encode
1366
1284
insertions = {}
1368
# Accumulate parent references (path_utf8, id), to check for parentless
1369
# items or items placed under files/links/tree-references. We get
1370
# references from every item in the delta that is not a deletion and
1371
# is not itself the root.
1373
# Added ids must not be in the dirstate already. This set holds those
1376
# This loop transforms the delta to single atomic operations that can
1377
# be executed and validated.
1378
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1379
for old_path, new_path, file_id, inv_entry in delta:
1286
for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1380
1287
if (file_id in insertions) or (file_id in removals):
1381
self._raise_invalid(old_path or new_path, file_id,
1288
raise AssertionError("repeated file id in delta %r" % (file_id,))
1383
1289
if old_path is not None:
1384
1290
old_path = old_path.encode('utf-8')
1385
1291
removals[file_id] = old_path
1387
new_ids.add(file_id)
1388
1292
if new_path is not None:
1389
if inv_entry is None:
1390
self._raise_invalid(new_path, file_id,
1391
"new_path with no entry")
1392
1293
new_path = new_path.encode('utf-8')
1393
dirname_utf8, basename = osutils.split(new_path)
1395
parents.add((dirname_utf8, inv_entry.parent_id))
1396
key = (dirname_utf8, basename, file_id)
1294
dirname, basename = osutils.split(new_path)
1295
key = (dirname, basename, file_id)
1397
1296
minikind = DirState._kind_to_minikind[inv_entry.kind]
1398
1297
if minikind == 't':
1399
fingerprint = inv_entry.reference_revision or ''
1298
fingerprint = inv_entry.reference_revision
1401
1300
fingerprint = ''
1402
1301
insertions[file_id] = (key, minikind, inv_entry.executable,
1411
1310
minikind = child[1][0][0]
1412
1311
fingerprint = child[1][0][4]
1413
1312
executable = child[1][0][3]
1414
old_child_path = osutils.pathjoin(child_dirname,
1313
old_child_path = osutils.pathjoin(child[0][0],
1416
1315
removals[child[0][2]] = old_child_path
1417
1316
child_suffix = child_dirname[len(old_path):]
1418
1317
new_child_dirname = (new_path + child_suffix)
1419
1318
key = (new_child_dirname, child_basename, child[0][2])
1420
new_child_path = osutils.pathjoin(new_child_dirname,
1319
new_child_path = os.path.join(new_child_dirname,
1422
1321
insertions[child[0][2]] = (key, minikind, executable,
1423
1322
fingerprint, new_child_path)
1424
self._check_delta_ids_absent(new_ids, delta, 0)
1426
self._apply_removals(removals.iteritems())
1427
self._apply_insertions(insertions.values())
1429
self._after_delta_check_parents(parents, 0)
1430
except errors.BzrError, e:
1431
self._changes_aborted = True
1432
if 'integrity error' not in str(e):
1434
# _get_entry raises BzrError when a request is inconsistent; we
1435
# want such errors to be shown as InconsistentDelta - and that
1436
# fits the behaviour we trigger.
1437
raise errors.InconsistentDeltaDelta(delta,
1438
"error from _get_entry. %s" % (e,))
1323
self._apply_removals(removals.values())
1324
self._apply_insertions(insertions.values())
1440
1326
def _apply_removals(self, removals):
1441
for file_id, path in sorted(removals, reverse=True,
1442
key=operator.itemgetter(1)):
1327
for path in sorted(removals, reverse=True):
1443
1328
dirname, basename = osutils.split(path)
1444
1329
block_i, entry_i, d_present, f_present = \
1445
1330
self._get_block_entry_index(dirname, basename, 0)
1447
entry = self._dirblocks[block_i][1][entry_i]
1449
self._raise_invalid(path, file_id,
1450
"Wrong path for old path.")
1451
if not f_present or entry[1][0][0] in 'ar':
1452
self._raise_invalid(path, file_id,
1453
"Wrong path for old path.")
1454
if file_id != entry[0][2]:
1455
self._raise_invalid(path, file_id,
1456
"Attempt to remove path has wrong id - found %r."
1331
entry = self._dirblocks[block_i][1][entry_i]
1458
1332
self._make_absent(entry)
1459
1333
# See if we have a malformed delta: deleting a directory must not
1460
1334
# leave crud behind. This increases the number of bisects needed
1526
1399
# At the same time, to reduce interface friction we convert the input
1527
1400
# inventory entries to dirstate.
1528
1401
root_only = ('', '')
1529
# Accumulate parent references (path_utf8, id), to check for parentless
1530
# items or items placed under files/links/tree-references. We get
1531
# references from every item in the delta that is not a deletion and
1532
# is not itself the root.
1534
# Added ids must not be in the dirstate already. This set holds those
1537
1402
for old_path, new_path, file_id, inv_entry in delta:
1538
if inv_entry is not None and file_id != inv_entry.file_id:
1539
self._raise_invalid(new_path, file_id,
1540
"mismatched entry file_id %r" % inv_entry)
1541
if new_path is None:
1542
new_path_utf8 = None
1544
if inv_entry is None:
1545
self._raise_invalid(new_path, file_id,
1546
"new_path with no entry")
1547
new_path_utf8 = encode(new_path)
1548
# note the parent for validation
1549
dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1551
parents.add((dirname_utf8, inv_entry.parent_id))
1552
if old_path is None:
1553
old_path_utf8 = None
1555
old_path_utf8 = encode(old_path)
1556
if old_path is None:
1557
adds.append((None, new_path_utf8, file_id,
1403
if old_path is None:
1404
adds.append((None, encode(new_path), file_id,
1558
1405
inv_to_entry(inv_entry), True))
1559
new_ids.add(file_id)
1560
1406
elif new_path is None:
1561
deletes.append((old_path_utf8, None, file_id, None, True))
1562
elif (old_path, new_path) == root_only:
1563
# change things in-place
1564
# Note: the case of a parent directory changing its file_id
1565
# tends to break optimizations here, because officially
1566
# the file has actually been moved, it just happens to
1567
# end up at the same path. If we can figure out how to
1568
# handle that case, we can avoid a lot of add+delete
1569
# pairs for objects that stay put.
1570
# elif old_path == new_path:
1571
changes.append((old_path_utf8, new_path_utf8, file_id,
1572
inv_to_entry(inv_entry)))
1407
deletes.append((encode(old_path), None, file_id, None, True))
1408
elif (old_path, new_path) != root_only:
1575
1410
# Because renames must preserve their children we must have
1576
1411
# processed all relocations and removes before hand. The sort
1585
1420
# for 'r' items on every pass.
1586
1421
self._update_basis_apply_deletes(deletes)
1423
new_path_utf8 = encode(new_path)
1588
1424
# Split into an add/delete pair recursively.
1589
adds.append((old_path_utf8, new_path_utf8, file_id,
1590
inv_to_entry(inv_entry), False))
1425
adds.append((None, new_path_utf8, file_id,
1426
inv_to_entry(inv_entry), False))
1591
1427
# Expunge deletes that we've seen so that deleted/renamed
1592
1428
# children of a rename directory are handled correctly.
1593
new_deletes = reversed(list(
1594
self._iter_child_entries(1, old_path_utf8)))
1429
new_deletes = reversed(list(self._iter_child_entries(1,
1595
1431
# Remove the current contents of the tree at orig_path, and
1596
1432
# reinsert at the correct new path.
1597
1433
for entry in new_deletes:
1598
child_dirname, child_basename, child_file_id = entry[0]
1600
source_path = child_dirname + '/' + child_basename
1435
source_path = entry[0][0] + '/' + entry[0][1]
1602
source_path = child_basename
1437
source_path = entry[0][1]
1603
1438
if new_path_utf8:
1604
1439
target_path = new_path_utf8 + source_path[len(old_path):]
1606
1441
if old_path == '':
1607
1442
raise AssertionError("cannot rename directory to"
1609
1444
target_path = source_path[len(old_path) + 1:]
1610
1445
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1611
1446
deletes.append(
1612
1447
(source_path, target_path, entry[0][2], None, False))
1613
deletes.append((old_path_utf8, new_path, file_id, None, False))
1614
self._check_delta_ids_absent(new_ids, delta, 1)
1616
# Finish expunging deletes/first half of renames.
1617
self._update_basis_apply_deletes(deletes)
1618
# Reinstate second half of renames and new paths.
1619
self._update_basis_apply_adds(adds)
1620
# Apply in-situ changes.
1621
self._update_basis_apply_changes(changes)
1623
self._after_delta_check_parents(parents, 1)
1624
except errors.BzrError, e:
1625
self._changes_aborted = True
1626
if 'integrity error' not in str(e):
1628
# _get_entry raises BzrError when a request is inconsistent; we
1629
# want such errors to be shown as InconsistentDelta - and that
1630
# fits the behaviour we trigger.
1631
raise errors.InconsistentDeltaDelta(delta,
1632
"error from _get_entry. %s" % (e,))
1634
self._mark_modified(header_modified=True)
1449
(encode(old_path), new_path, file_id, None, False))
1451
# changes to just the root should not require remove/insertion
1453
changes.append((encode(old_path), encode(new_path), file_id,
1454
inv_to_entry(inv_entry)))
1456
# Finish expunging deletes/first half of renames.
1457
self._update_basis_apply_deletes(deletes)
1458
# Reinstate second half of renames and new paths.
1459
self._update_basis_apply_adds(adds)
1460
# Apply in-situ changes.
1461
self._update_basis_apply_changes(changes)
1463
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
self._header_state = DirState.IN_MEMORY_MODIFIED
1635
1465
self._id_index = None
1638
def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1639
"""Check that none of the file_ids in new_ids are present in a tree."""
1642
id_index = self._get_id_index()
1643
for file_id in new_ids:
1644
for key in id_index.get(file_id, ()):
1645
block_i, entry_i, d_present, f_present = \
1646
self._get_block_entry_index(key[0], key[1], tree_index)
1648
# In a different tree
1650
entry = self._dirblocks[block_i][1][entry_i]
1651
if entry[0][2] != file_id:
1652
# Different file_id, so not what we want.
1654
self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id,
1655
"This file_id is new in the delta but already present in "
1658
def _raise_invalid(self, path, file_id, reason):
1659
self._changes_aborted = True
1660
raise errors.InconsistentDelta(path, file_id, reason)
1662
1468
def _update_basis_apply_adds(self, adds):
1663
1469
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1673
1479
# Adds are accumulated partly from renames, so can be in any input
1674
1480
# order - sort it.
1675
# TODO: we may want to sort in dirblocks order. That way each entry
1676
# will end up in the same directory, allowing the _get_entry
1677
# fast-path for looking up 2 items in the same dir work.
1678
adds.sort(key=lambda x: x[1])
1679
1482
# adds is now in lexographic order, which places all parents before
1680
1483
# their children, so we can process it linearly.
1682
st = static_tuple.StaticTuple
1683
1485
for old_path, new_path, file_id, new_details, real_add in adds:
1684
dirname, basename = osutils.split(new_path)
1685
entry_key = st(dirname, basename, file_id)
1686
block_index, present = self._find_block_index_from_key(entry_key)
1688
self._raise_invalid(new_path, file_id,
1689
"Unable to find block for this record."
1690
" Was the parent added?")
1691
block = self._dirblocks[block_index][1]
1692
entry_index, present = self._find_entry_index(entry_key, block)
1694
if old_path is not None:
1695
self._raise_invalid(new_path, file_id,
1696
'considered a real add but still had old_path at %s'
1699
entry = block[entry_index]
1700
basis_kind = entry[1][1][0]
1701
if basis_kind == 'a':
1702
entry[1][1] = new_details
1703
elif basis_kind == 'r':
1704
raise NotImplementedError()
1706
self._raise_invalid(new_path, file_id,
1707
"An entry was marked as a new add"
1708
" but the basis target already existed")
1710
# The exact key was not found in the block. However, we need to
1711
# check if there is a key next to us that would have matched.
1712
# We only need to check 2 locations, because there are only 2
1714
for maybe_index in range(entry_index-1, entry_index+1):
1715
if maybe_index < 0 or maybe_index >= len(block):
1717
maybe_entry = block[maybe_index]
1718
if maybe_entry[0][:2] != (dirname, basename):
1719
# Just a random neighbor
1721
if maybe_entry[0][2] == file_id:
1722
raise AssertionError(
1723
'_find_entry_index didnt find a key match'
1724
' but walking the data did, for %s'
1726
basis_kind = maybe_entry[1][1][0]
1727
if basis_kind not in 'ar':
1728
self._raise_invalid(new_path, file_id,
1729
"we have an add record for path, but the path"
1730
" is already present with another file_id %s"
1731
% (maybe_entry[0][2],))
1733
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1735
block.insert(entry_index, entry)
1737
active_kind = entry[1][0][0]
1738
if active_kind == 'a':
1739
# The active record shows up as absent, this could be genuine,
1740
# or it could be present at some other location. We need to
1742
id_index = self._get_id_index()
1743
# The id_index may not be perfectly accurate for tree1, because
1744
# we haven't been keeping it updated. However, it should be
1745
# fine for tree0, and that gives us enough info for what we
1747
keys = id_index.get(file_id, ())
1749
block_i, entry_i, d_present, f_present = \
1750
self._get_block_entry_index(key[0], key[1], 0)
1753
active_entry = self._dirblocks[block_i][1][entry_i]
1754
if (active_entry[0][2] != file_id):
1755
# Some other file is at this path, we don't need to
1758
real_active_kind = active_entry[1][0][0]
1759
if real_active_kind in 'ar':
1760
# We found a record, which was not *this* record,
1761
# which matches the file_id, but is not actually
1762
# present. Something seems *really* wrong.
1763
self._raise_invalid(new_path, file_id,
1764
"We found a tree0 entry that doesnt make sense")
1765
# Now, we've found a tree0 entry which matches the file_id
1766
# but is at a different location. So update them to be
1768
active_dir, active_name = active_entry[0][:2]
1770
active_path = active_dir + '/' + active_name
1772
active_path = active_name
1773
active_entry[1][1] = st('r', new_path, 0, False, '')
1774
entry[1][0] = st('r', active_path, 0, False, '')
1775
elif active_kind == 'r':
1776
raise NotImplementedError()
1778
new_kind = new_details[0]
1780
self._ensure_block(block_index, entry_index, new_path)
1486
# the entry for this file_id must be in tree 0.
1487
entry = self._get_entry(0, file_id, new_path)
1488
if entry[0] is None or entry[0][2] != file_id:
1489
self._changes_aborted = True
1490
raise errors.InconsistentDelta(new_path, file_id,
1491
'working tree does not contain new entry')
1492
if real_add and entry[1][1][0] not in absent:
1493
self._changes_aborted = True
1494
raise errors.InconsistentDelta(new_path, file_id,
1495
'The entry was considered to be a genuinely new record,'
1496
' but there was already an old record for it.')
1497
# We don't need to update the target of an 'r' because the handling
1498
# of renames turns all 'r' situations into a delete at the original
1500
entry[1][1] = new_details
1782
1502
def _update_basis_apply_changes(self, changes):
1783
1503
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1809
1535
null = DirState.NULL_PARENT_DETAILS
1810
1536
for old_path, new_path, file_id, _, real_delete in deletes:
1811
1537
if real_delete != (new_path is None):
1812
self._raise_invalid(old_path, file_id, "bad delete delta")
1538
raise AssertionError("bad delete delta")
1813
1539
# the entry for this file_id must be in tree 1.
1814
1540
dirname, basename = osutils.split(old_path)
1815
1541
block_index, entry_index, dir_present, file_present = \
1816
1542
self._get_block_entry_index(dirname, basename, 1)
1817
1543
if not file_present:
1818
self._raise_invalid(old_path, file_id,
1544
self._changes_aborted = True
1545
raise errors.InconsistentDelta(old_path, file_id,
1819
1546
'basis tree does not contain removed entry')
1820
1547
entry = self._dirblocks[block_index][1][entry_index]
1821
# The state of the entry in the 'active' WT
1822
active_kind = entry[1][0][0]
1823
1548
if entry[0][2] != file_id:
1824
self._raise_invalid(old_path, file_id,
1549
self._changes_aborted = True
1550
raise errors.InconsistentDelta(old_path, file_id,
1825
1551
'mismatched file_id in tree 1')
1827
old_kind = entry[1][1][0]
1828
if active_kind in 'ar':
1829
# The active tree doesn't have this file_id.
1830
# The basis tree is changing this record. If this is a
1831
# rename, then we don't want the record here at all
1832
# anymore. If it is just an in-place change, we want the
1833
# record here, but we'll add it if we need to. So we just
1835
if active_kind == 'r':
1836
active_path = entry[1][0][1]
1837
active_entry = self._get_entry(0, file_id, active_path)
1838
if active_entry[1][1][0] != 'r':
1839
self._raise_invalid(old_path, file_id,
1840
"Dirstate did not have matching rename entries")
1841
elif active_entry[1][0][0] in 'ar':
1842
self._raise_invalid(old_path, file_id,
1843
"Dirstate had a rename pointing at an inactive"
1845
active_entry[1][1] = null
1553
if entry[1][0][0] != 'a':
1554
self._changes_aborted = True
1555
raise errors.InconsistentDelta(old_path, file_id,
1556
'This was marked as a real delete, but the WT state'
1557
' claims that it still exists and is versioned.')
1846
1558
del self._dirblocks[block_index][1][entry_index]
1848
# This was a directory, and the active tree says it
1849
# doesn't exist, and now the basis tree says it doesn't
1850
# exist. Remove its dirblock if present
1852
present) = self._find_block_index_from_key(
1855
dir_block = self._dirblocks[dir_block_index][1]
1857
# This entry is empty, go ahead and just remove it
1858
del self._dirblocks[dir_block_index]
1860
# There is still an active record, so just mark this
1863
block_i, entry_i, d_present, f_present = \
1864
self._get_block_entry_index(old_path, '', 1)
1866
dir_block = self._dirblocks[block_i][1]
1867
for child_entry in dir_block:
1868
child_basis_kind = child_entry[1][1][0]
1869
if child_basis_kind not in 'ar':
1870
self._raise_invalid(old_path, file_id,
1871
"The file id was deleted but its children were "
1874
def _after_delta_check_parents(self, parents, index):
1875
"""Check that parents required by the delta are all intact.
1877
:param parents: An iterable of (path_utf8, file_id) tuples which are
1878
required to be present in tree 'index' at path_utf8 with id file_id
1880
:param index: The column in the dirstate to check for parents in.
1882
for dirname_utf8, file_id in parents:
1883
# Get the entry - the ensures that file_id, dirname_utf8 exists and
1884
# has the right file id.
1885
entry = self._get_entry(index, file_id, dirname_utf8)
1886
if entry[1] is None:
1887
self._raise_invalid(dirname_utf8.decode('utf8'),
1888
file_id, "This parent is not present.")
1889
# Parents of things must be directories
1890
if entry[1][index][0] != 'd':
1891
self._raise_invalid(dirname_utf8.decode('utf8'),
1892
file_id, "This parent is not a directory.")
1560
if entry[1][0][0] == 'a':
1561
self._changes_aborted = True
1562
raise errors.InconsistentDelta(old_path, file_id,
1563
'The entry was considered a rename, but the source path'
1564
' is marked as absent.')
1565
# For whatever reason, we were asked to rename an entry
1566
# that was originally marked as deleted. This could be
1567
# because we are renaming the parent directory, and the WT
1568
# current state has the file marked as deleted.
1569
elif entry[1][0][0] == 'r':
1570
# implement the rename
1571
del self._dirblocks[block_index][1][entry_index]
1573
# it is being resurrected here, so blank it out temporarily.
1574
self._dirblocks[block_index][1][entry_index][1][1] = null
1894
1576
def _observed_sha1(self, entry, sha1, stat_value,
1895
1577
_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
2327
2002
def _get_id_index(self):
2328
"""Get an id index of self._dirblocks.
2330
This maps from file_id => [(directory, name, file_id)] entries where
2331
that file_id appears in one of the trees.
2003
"""Get an id index of self._dirblocks."""
2333
2004
if self._id_index is None:
2335
2006
for key, tree_details in self._iter_entries():
2336
self._add_to_id_index(id_index, key)
2007
id_index.setdefault(key[2], set()).add(key)
2337
2008
self._id_index = id_index
2338
2009
return self._id_index
2340
def _add_to_id_index(self, id_index, entry_key):
2341
"""Add this entry to the _id_index mapping."""
2342
# This code used to use a set for every entry in the id_index. However,
2343
# it is *rare* to have more than one entry. So a set is a large
2344
# overkill. And even when we do, we won't ever have more than the
2345
# number of parent trees. Which is still a small number (rarely >2). As
2346
# such, we use a simple tuple, and do our own uniqueness checks. While
2347
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2348
# cause quadratic failure.
2349
file_id = entry_key[2]
2350
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2351
if file_id not in id_index:
2352
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2354
entry_keys = id_index[file_id]
2355
if entry_key not in entry_keys:
2356
id_index[file_id] = entry_keys + (entry_key,)
2358
def _remove_from_id_index(self, id_index, entry_key):
2359
"""Remove this entry from the _id_index mapping.
2361
It is an programming error to call this when the entry_key is not
2364
file_id = entry_key[2]
2365
entry_keys = list(id_index[file_id])
2366
entry_keys.remove(entry_key)
2367
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2369
2011
def _get_output_lines(self, lines):
2370
2012
"""Format lines for final output.
2725
2324
new_details = []
2726
2325
for lookup_index in xrange(tree_index):
2727
2326
# boundary case: this is the first occurence of file_id
2728
# so there are no id_indexes, possibly take this out of
2327
# so there are no id_indexs, possibly take this out of
2730
if not len(entry_keys):
2329
if not len(id_index[file_id]):
2731
2330
new_details.append(DirState.NULL_PARENT_DETAILS)
2733
2332
# grab any one entry, use it to find the right path.
2734
a_key = iter(entry_keys).next()
2333
# TODO: optimise this to reduce memory use in highly
2334
# fragmented situations by reusing the relocation
2336
a_key = iter(id_index[file_id]).next()
2735
2337
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2736
# its a pointer or missing statement, use it as
2338
# its a pointer or missing statement, use it as is.
2738
2339
new_details.append(by_path[a_key][lookup_index])
2740
2341
# we have the right key, make a pointer to it.
2741
2342
real_path = ('/'.join(a_key[0:2])).strip('/')
2742
new_details.append(st('r', real_path, 0, False,
2343
new_details.append(('r', real_path, 0, False, ''))
2744
2344
new_details.append(self._inv_entry_to_details(entry))
2745
2345
new_details.extend(new_location_suffix)
2746
2346
by_path[new_entry_key] = new_details
2747
self._add_to_id_index(id_index, new_entry_key)
2347
id_index[file_id].add(new_entry_key)
2748
2348
# --- end generation of full tree mappings
2750
2350
# sort and output all the entries
2889
2462
and new_entry_key[1:] < current_old[0][1:])):
2890
2463
# new comes before:
2891
2464
# add a entry for this and advance new
2893
trace.mutter("Inserting from new '%s'.",
2894
new_path_utf8.decode('utf8'))
2895
2465
self.update_minimal(new_entry_key, current_new_minikind,
2896
2466
executable=current_new[1].executable,
2897
path_utf8=new_path_utf8, fingerprint=fingerprint,
2467
path_utf8=new_path_utf8, fingerprint=fingerprint)
2899
2468
current_new = advance(new_iterator)
2901
2470
# we've advanced past the place where the old key would be,
2902
2471
# without seeing it in the new list. so it must be gone.
2904
trace.mutter("Deleting from old '%s/%s'.",
2905
current_old[0][0].decode('utf8'),
2906
current_old[0][1].decode('utf8'))
2907
2472
self._make_absent(current_old)
2908
2473
current_old = advance(old_iterator)
2909
self._mark_modified()
2474
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2910
2475
self._id_index = None
2911
2476
self._packed_stat_index = None
2913
trace.mutter("set_state_from_inventory complete.")
2915
def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts):
2916
"""Wipe the currently stored state and set it to something new.
2918
This is a hard-reset for the data we are working with.
2920
# Technically, we really want a write lock, but until we write, we
2921
# don't really need it.
2922
self._requires_lock()
2923
# root dir and root dir contents with no children. We have to have a
2924
# root for set_state_from_inventory to work correctly.
2925
empty_root = (('', '', inventory.ROOT_ID),
2926
[('d', '', 0, False, DirState.NULLSTAT)])
2927
empty_tree_dirblocks = [('', [empty_root]), ('', [])]
2928
self._set_data([], empty_tree_dirblocks)
2929
self.set_state_from_inventory(working_inv)
2930
self.set_parent_trees(parent_trees, parent_ghosts)
2932
2478
def _make_absent(self, current_old):
2933
2479
"""Mark current_old - an entry - as absent for tree 0.
3043
2569
# grab one of them and use it to generate parent
3044
2570
# relocation/absent entries.
3045
2571
new_entry = key, [new_details]
3046
# existing_keys can be changed as we iterate.
3047
for other_key in tuple(existing_keys):
2572
for other_key in existing_keys:
3048
2573
# change the record at other to be a pointer to this new
3049
2574
# record. The loop looks similar to the change to
3050
2575
# relocations when updating an existing record but its not:
3051
2576
# the test for existing kinds is different: this can be
3052
2577
# factored out to a helper though.
3053
other_block_index, present = self._find_block_index_from_key(
3056
raise AssertionError('could not find block for %s' % (
3058
other_block = self._dirblocks[other_block_index][1]
3059
other_entry_index, present = self._find_entry_index(
3060
other_key, other_block)
3062
raise AssertionError(
3063
'update_minimal: could not find other entry for %s'
2578
other_block_index, present = self._find_block_index_from_key(other_key)
2580
raise AssertionError('could not find block for %s' % (other_key,))
2581
other_entry_index, present = self._find_entry_index(other_key,
2582
self._dirblocks[other_block_index][1])
2584
raise AssertionError('could not find entry for %s' % (other_key,))
3065
2585
if path_utf8 is None:
3066
2586
raise AssertionError('no path')
3067
# Turn this other location into a reference to the new
3068
# location. This also updates the aliased iterator
3069
# (current_old in set_state_from_inventory) so that the old
3070
# entry, if not already examined, is skipped over by that
3072
other_entry = other_block[other_entry_index]
3073
other_entry[1][0] = ('r', path_utf8, 0, False, '')
3074
if self._maybe_remove_row(other_block, other_entry_index,
3076
# If the row holding this was removed, we need to
3077
# recompute where this entry goes
3078
entry_index, _ = self._find_entry_index(key, block)
2587
self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2588
('r', path_utf8, 0, False, '')
3081
# adds a tuple to the new details for each column
3082
# - either by copying an existing relocation pointer inside that column
3083
# - or by creating a new pointer to the right row inside that column
3084
2590
num_present_parents = self._num_present_parents()
3085
if num_present_parents:
3086
# TODO: This re-evaluates the existing_keys set, do we need
3087
# to do that ourselves?
3088
other_key = list(existing_keys)[0]
3089
2591
for lookup_index in xrange(1, num_present_parents + 1):
3090
2592
# grab any one entry, use it to find the right path.
3091
2593
# TODO: optimise this to reduce memory use in highly
3475
2937
entry[1][0] = ('l', '', stat_value.st_size,
3476
2938
False, DirState.NULLSTAT)
3478
state._mark_modified([entry])
2939
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3479
2940
return link_or_sha1
2941
update_entry = py_update_entry
3482
2944
class ProcessEntryPython(object):
3484
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2946
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
3485
2947
"last_source_parent", "last_target_parent", "include_unchanged",
3486
"partial", "use_filesystem_for_exec", "utf8_decode",
3487
"searched_specific_files", "search_specific_files",
3488
"searched_exact_paths", "search_specific_file_parents", "seen_ids",
3489
"state", "source_index", "target_index", "want_unversioned", "tree"]
2948
"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2949
"search_specific_files", "state", "source_index", "target_index",
2950
"want_unversioned", "tree"]
3491
2952
def __init__(self, include_unchanged, use_filesystem_for_exec,
3492
2953
search_specific_files, state, source_index, target_index,
3493
2954
want_unversioned, tree):
3494
2955
self.old_dirname_to_file_id = {}
3495
2956
self.new_dirname_to_file_id = {}
3496
# Are we doing a partial iter_changes?
3497
self.partial = search_specific_files != set([''])
2957
# Just a sentry, so that _process_entry can say that this
2958
# record is handled, but isn't interesting to process (unchanged)
2959
self.uninteresting = object()
3498
2960
# Using a list so that we can access the values and change them in
3499
2961
# nested scope. Each one is [path, file_id, entry]
3500
2962
self.last_source_parent = [None, None]
4156
3577
current_dir_info = dir_iterator.next()
4157
3578
except StopIteration:
4158
3579
current_dir_info = None
4159
for result in self._iter_specific_file_parents():
4162
def _iter_specific_file_parents(self):
4163
"""Iter over the specific file parents."""
4164
while self.search_specific_file_parents:
4165
# Process the parent directories for the paths we were iterating.
4166
# Even in extremely large trees this should be modest, so currently
4167
# no attempt is made to optimise.
4168
path_utf8 = self.search_specific_file_parents.pop()
4169
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
4170
# We've examined this path.
4172
if path_utf8 in self.searched_exact_paths:
4173
# We've examined this path.
4175
path_entries = self.state._entries_for_path(path_utf8)
4176
# We need either one or two entries. If the path in
4177
# self.target_index has moved (so the entry in source_index is in
4178
# 'ar') then we need to also look for the entry for this path in
4179
# self.source_index, to output the appropriate delete-or-rename.
4180
selected_entries = []
4182
for candidate_entry in path_entries:
4183
# Find entries present in target at this path:
4184
if candidate_entry[1][self.target_index][0] not in 'ar':
4186
selected_entries.append(candidate_entry)
4187
# Find entries present in source at this path:
4188
elif (self.source_index is not None and
4189
candidate_entry[1][self.source_index][0] not in 'ar'):
4191
if candidate_entry[1][self.target_index][0] == 'a':
4192
# Deleted, emit it here.
4193
selected_entries.append(candidate_entry)
4195
# renamed, emit it when we process the directory it
4197
self.search_specific_file_parents.add(
4198
candidate_entry[1][self.target_index][1])
4200
raise AssertionError(
4201
"Missing entry for specific path parent %r, %r" % (
4202
path_utf8, path_entries))
4203
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
4204
for entry in selected_entries:
4205
if entry[0][2] in self.seen_ids:
4207
result, changed = self._process_entry(entry, path_info)
4209
raise AssertionError(
4210
"Got entry<->path mismatch for specific path "
4211
"%r entry %r path_info %r " % (
4212
path_utf8, entry, path_info))
4213
# Only include changes - we're outside the users requested
4216
self._gather_result_for_consistency(result)
4217
if (result[6][0] == 'directory' and
4218
result[6][1] != 'directory'):
4219
# This stopped being a directory, the old children have
4221
if entry[1][self.source_index][0] == 'r':
4222
# renamed, take the source path
4223
entry_path_utf8 = entry[1][self.source_index][1]
4225
entry_path_utf8 = path_utf8
4226
initial_key = (entry_path_utf8, '', '')
4227
block_index, _ = self.state._find_block_index_from_key(
4229
if block_index == 0:
4230
# The children of the root are in block index 1.
4232
current_block = None
4233
if block_index < len(self.state._dirblocks):
4234
current_block = self.state._dirblocks[block_index]
4235
if not osutils.is_inside(
4236
entry_path_utf8, current_block[0]):
4237
# No entries for this directory at all.
4238
current_block = None
4239
if current_block is not None:
4240
for entry in current_block[1]:
4241
if entry[1][self.source_index][0] in 'ar':
4242
# Not in the source tree, so doesn't have to be
4245
# Path of the entry itself.
4247
self.search_specific_file_parents.add(
4248
osutils.pathjoin(*entry[0][:2]))
4249
if changed or self.include_unchanged:
4251
self.searched_exact_paths.add(path_utf8)
4253
def _path_info(self, utf8_path, unicode_path):
4254
"""Generate path_info for unicode_path.
4256
:return: None if unicode_path does not exist, or a path_info tuple.
4258
abspath = self.tree.abspath(unicode_path)
4260
stat = os.lstat(abspath)
4262
if e.errno == errno.ENOENT:
4263
# the path does not exist.
4267
utf8_basename = utf8_path.rsplit('/', 1)[-1]
4268
dir_info = (utf8_path, utf8_basename,
4269
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4271
if dir_info[2] == 'directory':
4272
if self.tree._directory_is_tree_reference(
4274
self.root_dir_info = self.root_dir_info[:2] + \
4275
('tree-reference',) + self.root_dir_info[3:]
3580
_process_entry = ProcessEntryPython
4279
3583
# Try to load the compiled form if possible
4281
from bzrlib._dirstate_helpers_pyx import (
3585
from bzrlib._dirstate_helpers_c import (
3586
_read_dirblocks_c as _read_dirblocks,
3587
bisect_dirblock_c as bisect_dirblock,
3588
_bisect_path_left_c as _bisect_path_left,
3589
_bisect_path_right_c as _bisect_path_right,
3590
cmp_by_dirs_c as cmp_by_dirs,
4287
3591
ProcessEntryC as _process_entry,
4288
3592
update_entry as update_entry,
4290
except ImportError, e:
4291
osutils.failed_to_load_extension(e)
4292
3595
from bzrlib._dirstate_helpers_py import (
3596
_read_dirblocks_py as _read_dirblocks,
3597
bisect_dirblock_py as bisect_dirblock,
3598
_bisect_path_left_py as _bisect_path_left,
3599
_bisect_path_right_py as _bisect_path_right,
3600
cmp_by_dirs_py as cmp_by_dirs,
4299
# FIXME: It would be nice to be able to track moved lines so that the
4300
# corresponding python code can be moved to the _dirstate_helpers_py
4301
# module. I don't want to break the history for this important piece of
4302
# code so I left the code here -- vila 20090622
4303
update_entry = py_update_entry
4304
_process_entry = ProcessEntryPython